The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_carp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
    3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
   18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
   24  * THE POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD$");
   29 
   30 #include "opt_bpf.h"
   31 #include "opt_inet.h"
   32 #include "opt_inet6.h"
   33 
   34 #include <sys/types.h>
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/conf.h>
   38 #include <sys/kernel.h>
   39 #include <sys/limits.h>
   40 #include <sys/malloc.h>
   41 #include <sys/mbuf.h>
   42 #include <sys/module.h>
   43 #include <sys/time.h>
   44 #include <sys/priv.h>
   45 #include <sys/proc.h>
   46 #include <sys/protosw.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/syslog.h>
   49 #include <sys/signalvar.h>
   50 #include <sys/filio.h>
   51 #include <sys/sockio.h>
   52 
   53 #include <sys/socket.h>
   54 #include <sys/vnode.h>
   55 
   56 #include <machine/stdarg.h>
   57 
   58 #include <net/bpf.h>
   59 #include <net/ethernet.h>
   60 #include <net/fddi.h>
   61 #include <net/iso88025.h>
   62 #include <net/if.h>
   63 #include <net/if_clone.h>
   64 #include <net/if_dl.h>
   65 #include <net/if_types.h>
   66 #include <net/route.h>
   67 #include <net/vnet.h>
   68 
   69 #ifdef INET
   70 #include <netinet/in.h>
   71 #include <netinet/in_var.h>
   72 #include <netinet/in_systm.h>
   73 #include <netinet/ip.h>
   74 #include <netinet/ip_var.h>
   75 #include <netinet/if_ether.h>
   76 #include <machine/in_cksum.h>
   77 #endif
   78 
   79 #ifdef INET6
   80 #include <netinet/icmp6.h>
   81 #include <netinet/ip6.h>
   82 #include <netinet6/ip6protosw.h>
   83 #include <netinet6/ip6_var.h>
   84 #include <netinet6/scope6_var.h>
   85 #include <netinet6/nd6.h>
   86 #endif
   87 
   88 #include <crypto/sha1.h>
   89 #include <netinet/ip_carp.h>
   90 
   91 #define CARP_IFNAME     "carp"
   92 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
   93 SYSCTL_DECL(_net_inet_carp);
   94 
   95 struct carp_softc {
   96         struct ifnet            *sc_ifp;        /* Interface clue */
   97         struct ifnet            *sc_carpdev;    /* Pointer to parent interface */
   98         struct in_ifaddr        *sc_ia;         /* primary iface address */
   99         struct ip_moptions       sc_imo;
  100 #ifdef INET6
  101         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
  102         struct ip6_moptions      sc_im6o;
  103 #endif /* INET6 */
  104         TAILQ_ENTRY(carp_softc)  sc_list;
  105 
  106         enum { INIT = 0, BACKUP, MASTER }       sc_state;
  107 
  108         int                      sc_flags_backup;
  109         int                      sc_suppress;
  110 
  111         int                      sc_sendad_errors;
  112 #define CARP_SENDAD_MAX_ERRORS  3
  113         int                      sc_sendad_success;
  114 #define CARP_SENDAD_MIN_SUCCESS 3
  115 
  116         int                      sc_vhid;
  117         int                      sc_advskew;
  118         int                      sc_naddrs;
  119         int                      sc_naddrs6;
  120         int                      sc_advbase;    /* seconds */
  121         int                      sc_init_counter;
  122         u_int64_t                sc_counter;
  123 
  124         /* authentication */
  125 #define CARP_HMAC_PAD   64
  126         unsigned char sc_key[CARP_KEY_LEN];
  127         unsigned char sc_pad[CARP_HMAC_PAD];
  128         SHA1_CTX sc_sha1;
  129 
  130         struct callout           sc_ad_tmo;     /* advertisement timeout */
  131         struct callout           sc_md_tmo;     /* master down timeout */
  132         struct callout           sc_md6_tmo;    /* master down timeout */
  133         
  134         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
  135 };
  136 #define SC2IFP(sc)      ((sc)->sc_ifp)
  137 
  138 int carp_suppress_preempt = 0;
  139 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 };    /* XXX for now */
  140 SYSCTL_NODE(_net_inet, IPPROTO_CARP,    carp,   CTLFLAG_RW, 0,  "CARP");
  141 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
  142     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
  143 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
  144     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
  145 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
  146     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
  147 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
  148     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
  149 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
  150     &carp_suppress_preempt, 0, "Preemption is suppressed");
  151 
  152 struct carpstats carpstats;
  153 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
  154     &carpstats, carpstats,
  155     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
  156 
  157 struct carp_if {
  158         TAILQ_HEAD(, carp_softc) vhif_vrs;
  159         int vhif_nvrs;
  160 
  161         struct ifnet    *vhif_ifp;
  162         struct mtx       vhif_mtx;
  163 };
  164 
  165 #define CARP_INET       0
  166 #define CARP_INET6      1
  167 static int proto_reg[] = {-1, -1};
  168 
  169 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
  170 #define SC2CIF(sc)              ((struct carp_if *)(sc)->sc_carpdev->if_carp)
  171 
  172 /* lock per carp_if queue */
  173 #define CARP_LOCK_INIT(cif)     mtx_init(&(cif)->vhif_mtx, "carp_if",   \
  174         NULL, MTX_DEF)
  175 #define CARP_LOCK_DESTROY(cif)  mtx_destroy(&(cif)->vhif_mtx)
  176 #define CARP_LOCK_ASSERT(cif)   mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
  177 #define CARP_LOCK(cif)          mtx_lock(&(cif)->vhif_mtx)
  178 #define CARP_UNLOCK(cif)        mtx_unlock(&(cif)->vhif_mtx)
  179 
  180 #define CARP_SCLOCK(sc)         mtx_lock(&SC2CIF(sc)->vhif_mtx)
  181 #define CARP_SCUNLOCK(sc)       mtx_unlock(&SC2CIF(sc)->vhif_mtx)
  182 #define CARP_SCLOCK_ASSERT(sc)  mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
  183 
  184 #define CARP_LOG(...)   do {                            \
  185         if (carp_opts[CARPCTL_LOG] > 0)                 \
  186                 log(LOG_INFO, __VA_ARGS__);             \
  187 } while (0)
  188 
  189 #define CARP_DEBUG(...) do {                            \
  190         if (carp_opts[CARPCTL_LOG] > 1)                 \
  191                 log(LOG_DEBUG, __VA_ARGS__);            \
  192 } while (0)
  193 
  194 static void     carp_hmac_prepare(struct carp_softc *);
  195 static void     carp_hmac_generate(struct carp_softc *, u_int32_t *,
  196                     unsigned char *);
  197 static int      carp_hmac_verify(struct carp_softc *, u_int32_t *,
  198                     unsigned char *);
  199 static void     carp_setroute(struct carp_softc *, int);
  200 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
  201 static int      carp_clone_create(struct if_clone *, int, caddr_t);
  202 static void     carp_clone_destroy(struct ifnet *);
  203 static void     carpdetach(struct carp_softc *, int);
  204 static int      carp_prepare_ad(struct mbuf *, struct carp_softc *,
  205                     struct carp_header *);
  206 static void     carp_send_ad_all(void);
  207 static void     carp_send_ad(void *);
  208 static void     carp_send_ad_locked(struct carp_softc *);
  209 static void     carp_send_arp(struct carp_softc *);
  210 static void     carp_master_down(void *);
  211 static void     carp_master_down_locked(struct carp_softc *);
  212 static int      carp_ioctl(struct ifnet *, u_long, caddr_t);
  213 static int      carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
  214                     struct route *);
  215 static void     carp_start(struct ifnet *);
  216 static void     carp_setrun(struct carp_softc *, sa_family_t);
  217 static void     carp_set_state(struct carp_softc *, int);
  218 static int      carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
  219 enum    { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
  220 
  221 static void     carp_multicast_cleanup(struct carp_softc *, int dofree);
  222 static int      carp_set_addr(struct carp_softc *, struct sockaddr_in *);
  223 static int      carp_del_addr(struct carp_softc *, struct sockaddr_in *);
  224 static void     carp_carpdev_state_locked(struct carp_if *);
  225 static void     carp_sc_state_locked(struct carp_softc *);
  226 #ifdef INET6
  227 static void     carp_send_na(struct carp_softc *);
  228 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
  229 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
  230 static void     carp_multicast6_cleanup(struct carp_softc *, int dofree);
  231 #endif
  232 
  233 static LIST_HEAD(, carp_softc) carpif_list;
  234 static struct mtx carp_mtx;
  235 IFC_SIMPLE_DECLARE(carp, 0);
  236 
  237 static eventhandler_tag if_detach_event_tag;
  238 
  239 static __inline u_int16_t
  240 carp_cksum(struct mbuf *m, int len)
  241 {
  242         return (in_cksum(m, len));
  243 }
  244 
  245 static void
  246 carp_hmac_prepare(struct carp_softc *sc)
  247 {
  248         u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
  249         u_int8_t vhid = sc->sc_vhid & 0xff;
  250         struct ifaddr *ifa;
  251         int i, found;
  252 #ifdef INET
  253         struct in_addr last, cur, in;
  254 #endif
  255 #ifdef INET6
  256         struct in6_addr last6, cur6, in6;
  257 #endif
  258 
  259         if (sc->sc_carpdev)
  260                 CARP_SCLOCK(sc);
  261 
  262         /* XXX: possible race here */
  263 
  264         /* compute ipad from key */
  265         bzero(sc->sc_pad, sizeof(sc->sc_pad));
  266         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
  267         for (i = 0; i < sizeof(sc->sc_pad); i++)
  268                 sc->sc_pad[i] ^= 0x36;
  269 
  270         /* precompute first part of inner hash */
  271         SHA1Init(&sc->sc_sha1);
  272         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
  273         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
  274         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
  275         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
  276 #ifdef INET
  277         cur.s_addr = 0;
  278         do {
  279                 found = 0;
  280                 last = cur;
  281                 cur.s_addr = 0xffffffff;
  282                 IF_ADDR_RLOCK(SC2IFP(sc));
  283                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
  284                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
  285                         if (ifa->ifa_addr->sa_family == AF_INET &&
  286                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
  287                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
  288                                 cur.s_addr = in.s_addr;
  289                                 found++;
  290                         }
  291                 }
  292                 IF_ADDR_RUNLOCK(SC2IFP(sc));
  293                 if (found)
  294                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
  295         } while (found);
  296 #endif /* INET */
  297 #ifdef INET6
  298         memset(&cur6, 0, sizeof(cur6));
  299         do {
  300                 found = 0;
  301                 last6 = cur6;
  302                 memset(&cur6, 0xff, sizeof(cur6));
  303                 IF_ADDR_RLOCK(SC2IFP(sc));
  304                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
  305                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
  306                         if (IN6_IS_SCOPE_EMBED(&in6))
  307                                 in6.s6_addr16[1] = 0;
  308                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
  309                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
  310                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
  311                                 cur6 = in6;
  312                                 found++;
  313                         }
  314                 }
  315                 IF_ADDR_RUNLOCK(SC2IFP(sc));
  316                 if (found)
  317                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
  318         } while (found);
  319 #endif /* INET6 */
  320 
  321         /* convert ipad to opad */
  322         for (i = 0; i < sizeof(sc->sc_pad); i++)
  323                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
  324 
  325         if (sc->sc_carpdev)
  326                 CARP_SCUNLOCK(sc);
  327 }
  328 
  329 static void
  330 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
  331     unsigned char md[20])
  332 {
  333         SHA1_CTX sha1ctx;
  334 
  335         /* fetch first half of inner hash */
  336         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
  337 
  338         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
  339         SHA1Final(md, &sha1ctx);
  340 
  341         /* outer hash */
  342         SHA1Init(&sha1ctx);
  343         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
  344         SHA1Update(&sha1ctx, md, 20);
  345         SHA1Final(md, &sha1ctx);
  346 }
  347 
  348 static int
  349 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
  350     unsigned char md[20])
  351 {
  352         unsigned char md2[20];
  353 
  354         CARP_SCLOCK_ASSERT(sc);
  355 
  356         carp_hmac_generate(sc, counter, md2);
  357 
  358         return (bcmp(md, md2, sizeof(md2)));
  359 }
  360 
  361 static void
  362 carp_setroute(struct carp_softc *sc, int cmd)
  363 {
  364         struct ifaddr *ifa;
  365         int s;
  366 
  367         if (sc->sc_carpdev)
  368                 CARP_SCLOCK_ASSERT(sc);
  369 
  370         s = splnet();
  371         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
  372                 if (ifa->ifa_addr->sa_family == AF_INET &&
  373                     sc->sc_carpdev != NULL) {
  374                         int count = carp_addrcount(
  375                             (struct carp_if *)sc->sc_carpdev->if_carp,
  376                             ifatoia(ifa), CARP_COUNT_MASTER);
  377 
  378                         if ((cmd == RTM_ADD && count == 1) ||
  379                             (cmd == RTM_DELETE && count == 0))
  380                                 rtinit(ifa, cmd, RTF_UP | RTF_HOST);
  381                 }
  382         }
  383         splx(s);
  384 }
  385 
  386 static int
  387 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
  388 {
  389 
  390         struct carp_softc *sc;
  391         struct ifnet *ifp;
  392 
  393         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
  394         ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
  395         if (ifp == NULL) {
  396                 free(sc, M_CARP);
  397                 return (ENOSPC);
  398         }
  399         
  400         sc->sc_flags_backup = 0;
  401         sc->sc_suppress = 0;
  402         sc->sc_advbase = CARP_DFLTINTV;
  403         sc->sc_vhid = -1;       /* required setting */
  404         sc->sc_advskew = 0;
  405         sc->sc_init_counter = 1;
  406         sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
  407         sc->sc_imo.imo_membership = (struct in_multi **)malloc(
  408             (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
  409             M_WAITOK);
  410         sc->sc_imo.imo_mfilters = NULL;
  411         sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
  412         sc->sc_imo.imo_multicast_vif = -1;
  413 #ifdef INET6
  414         sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
  415             (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
  416             M_WAITOK);
  417         sc->sc_im6o.im6o_mfilters = NULL;
  418         sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
  419         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
  420 #endif
  421 
  422         callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
  423         callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
  424         callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
  425         
  426         ifp->if_softc = sc;
  427         if_initname(ifp, CARP_IFNAME, unit);
  428         ifp->if_mtu = ETHERMTU;
  429         ifp->if_flags = IFF_LOOPBACK;
  430         ifp->if_ioctl = carp_ioctl;
  431         ifp->if_output = carp_looutput;
  432         ifp->if_start = carp_start;
  433         ifp->if_type = IFT_CARP;
  434         ifp->if_snd.ifq_maxlen = ifqmaxlen;
  435         ifp->if_hdrlen = 0;
  436         if_attach(ifp);
  437         bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
  438         mtx_lock(&carp_mtx);
  439         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
  440         mtx_unlock(&carp_mtx);
  441         return (0);
  442 }
  443 
  444 static void
  445 carp_clone_destroy(struct ifnet *ifp)
  446 {
  447         struct carp_softc *sc = ifp->if_softc;
  448 
  449         if (sc->sc_carpdev)
  450                 CARP_SCLOCK(sc);
  451         carpdetach(sc, 1);      /* Returns unlocked. */
  452 
  453         mtx_lock(&carp_mtx);
  454         LIST_REMOVE(sc, sc_next);
  455         mtx_unlock(&carp_mtx);
  456         bpfdetach(ifp);
  457         if_detach(ifp);
  458         if_free_type(ifp, IFT_ETHER);
  459         free(sc->sc_imo.imo_membership, M_CARP);
  460 #ifdef INET6
  461         free(sc->sc_im6o.im6o_membership, M_CARP);
  462 #endif
  463         free(sc, M_CARP);
  464 }
  465 
  466 /*
  467  * This function can be called on CARP interface destroy path,
  468  * and in case of the removal of the underlying interface as
  469  * well. We differentiate these two cases: in case of destruction
  470  * of the underlying interface, we do not cleanup our multicast
  471  * memberships, since they are already freed. But we purge pointers
  472  * to multicast structures, since they are no longer valid, to
  473  * avoid panic in future calls to carpdetach(). Also, we do not
  474  * release the lock on return, because the function will be
  475  * called once more, for another CARP instance on the same
  476  * interface.
  477  */
  478 static void
  479 carpdetach(struct carp_softc *sc, int unlock)
  480 {
  481         struct carp_if *cif;
  482 
  483         callout_stop(&sc->sc_ad_tmo);
  484         callout_stop(&sc->sc_md_tmo);
  485         callout_stop(&sc->sc_md6_tmo);
  486 
  487         if (sc->sc_suppress)
  488                 carp_suppress_preempt--;
  489         sc->sc_suppress = 0;
  490 
  491         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
  492                 carp_suppress_preempt--;
  493         sc->sc_sendad_errors = 0;
  494 
  495         carp_set_state(sc, INIT);
  496         SC2IFP(sc)->if_flags &= ~IFF_UP;
  497         carp_setrun(sc, 0);
  498         carp_multicast_cleanup(sc, unlock);
  499 #ifdef INET6
  500         carp_multicast6_cleanup(sc, unlock);
  501 #endif
  502 
  503         if (sc->sc_carpdev != NULL) {
  504                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
  505                 CARP_LOCK_ASSERT(cif);
  506                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
  507                 if (!--cif->vhif_nvrs) {
  508                         ifpromisc(sc->sc_carpdev, 0);
  509                         sc->sc_carpdev->if_carp = NULL;
  510                         CARP_LOCK_DESTROY(cif);
  511                         free(cif, M_CARP);
  512                 } else if (unlock)
  513                         CARP_UNLOCK(cif);
  514                 sc->sc_carpdev = NULL;
  515         }
  516 }
  517 
  518 /* Detach an interface from the carp. */
  519 static void
  520 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
  521 {
  522         struct carp_if *cif = (struct carp_if *)ifp->if_carp;
  523         struct carp_softc *sc, *nextsc;
  524 
  525         if (cif == NULL)
  526                 return;
  527 
  528         /*
  529          * XXX: At the end of for() cycle the lock will be destroyed.
  530          */
  531         CARP_LOCK(cif);
  532         for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
  533                 nextsc = TAILQ_NEXT(sc, sc_list);
  534                 carpdetach(sc, 0);
  535         }
  536 }
  537 
  538 /*
  539  * process input packet.
  540  * we have rearranged checks order compared to the rfc,
  541  * but it seems more efficient this way or not possible otherwise.
  542  */
  543 void
  544 carp_input(struct mbuf *m, int hlen)
  545 {
  546         struct ip *ip = mtod(m, struct ip *);
  547         struct carp_header *ch;
  548         int iplen, len;
  549 
  550         CARPSTATS_INC(carps_ipackets);
  551 
  552         if (!carp_opts[CARPCTL_ALLOW]) {
  553                 m_freem(m);
  554                 return;
  555         }
  556 
  557         /* check if received on a valid carp interface */
  558         if (m->m_pkthdr.rcvif->if_carp == NULL) {
  559                 CARPSTATS_INC(carps_badif);
  560                 CARP_DEBUG("carp_input: packet received on non-carp "
  561                     "interface: %s\n",
  562                     m->m_pkthdr.rcvif->if_xname);
  563                 m_freem(m);
  564                 return;
  565         }
  566 
  567         /* verify that the IP TTL is 255.  */
  568         if (ip->ip_ttl != CARP_DFLTTL) {
  569                 CARPSTATS_INC(carps_badttl);
  570                 CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n",
  571                     ip->ip_ttl,
  572                     m->m_pkthdr.rcvif->if_xname);
  573                 m_freem(m);
  574                 return;
  575         }
  576 
  577         iplen = ip->ip_hl << 2;
  578 
  579         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
  580                 CARPSTATS_INC(carps_badlen);
  581                 CARP_DEBUG("carp_input: received len %zd < "
  582                     "sizeof(struct carp_header) on %s\n",
  583                     m->m_len - sizeof(struct ip),
  584                     m->m_pkthdr.rcvif->if_xname);
  585                 m_freem(m);
  586                 return;
  587         }
  588 
  589         if (iplen + sizeof(*ch) < m->m_len) {
  590                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
  591                         CARPSTATS_INC(carps_hdrops);
  592                         CARP_DEBUG("carp_input: pullup failed\n");
  593                         return;
  594                 }
  595                 ip = mtod(m, struct ip *);
  596         }
  597         ch = (struct carp_header *)((char *)ip + iplen);
  598 
  599         /*
  600          * verify that the received packet length is
  601          * equal to the CARP header
  602          */
  603         len = iplen + sizeof(*ch);
  604         if (len > m->m_pkthdr.len) {
  605                 CARPSTATS_INC(carps_badlen);
  606                 CARP_DEBUG("carp_input: packet too short %d on %s\n",
  607                     m->m_pkthdr.len,
  608                     m->m_pkthdr.rcvif->if_xname);
  609                 m_freem(m);
  610                 return;
  611         }
  612 
  613         if ((m = m_pullup(m, len)) == NULL) {
  614                 CARPSTATS_INC(carps_hdrops);
  615                 return;
  616         }
  617         ip = mtod(m, struct ip *);
  618         ch = (struct carp_header *)((char *)ip + iplen);
  619 
  620         /* verify the CARP checksum */
  621         m->m_data += iplen;
  622         if (carp_cksum(m, len - iplen)) {
  623                 CARPSTATS_INC(carps_badsum);
  624                 CARP_DEBUG("carp_input: checksum failed on %s\n",
  625                     m->m_pkthdr.rcvif->if_xname);
  626                 m_freem(m);
  627                 return;
  628         }
  629         m->m_data -= iplen;
  630 
  631         carp_input_c(m, ch, AF_INET);
  632 }
  633 
  634 #ifdef INET6
  635 int
  636 carp6_input(struct mbuf **mp, int *offp, int proto)
  637 {
  638         struct mbuf *m = *mp;
  639         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
  640         struct carp_header *ch;
  641         u_int len;
  642 
  643         CARPSTATS_INC(carps_ipackets6);
  644 
  645         if (!carp_opts[CARPCTL_ALLOW]) {
  646                 m_freem(m);
  647                 return (IPPROTO_DONE);
  648         }
  649 
  650         /* check if received on a valid carp interface */
  651         if (m->m_pkthdr.rcvif->if_carp == NULL) {
  652                 CARPSTATS_INC(carps_badif);
  653                 CARP_DEBUG("carp6_input: packet received on non-carp "
  654                     "interface: %s\n",
  655                     m->m_pkthdr.rcvif->if_xname);
  656                 m_freem(m);
  657                 return (IPPROTO_DONE);
  658         }
  659 
  660         /* verify that the IP TTL is 255 */
  661         if (ip6->ip6_hlim != CARP_DFLTTL) {
  662                 CARPSTATS_INC(carps_badttl);
  663                 CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n",
  664                     ip6->ip6_hlim,
  665                     m->m_pkthdr.rcvif->if_xname);
  666                 m_freem(m);
  667                 return (IPPROTO_DONE);
  668         }
  669 
  670         /* verify that we have a complete carp packet */
  671         len = m->m_len;
  672         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
  673         if (ch == NULL) {
  674                 CARPSTATS_INC(carps_badlen);
  675                 CARP_DEBUG("carp6_input: packet size %u too small\n", len);
  676                 return (IPPROTO_DONE);
  677         }
  678 
  679 
  680         /* verify the CARP checksum */
  681         m->m_data += *offp;
  682         if (carp_cksum(m, sizeof(*ch))) {
  683                 CARPSTATS_INC(carps_badsum);
  684                 CARP_DEBUG("carp6_input: checksum failed, on %s\n",
  685                     m->m_pkthdr.rcvif->if_xname);
  686                 m_freem(m);
  687                 return (IPPROTO_DONE);
  688         }
  689         m->m_data -= *offp;
  690 
  691         carp_input_c(m, ch, AF_INET6);
  692         return (IPPROTO_DONE);
  693 }
  694 #endif /* INET6 */
  695 
  696 static void
  697 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
  698 {
  699         struct ifnet *ifp = m->m_pkthdr.rcvif;
  700         struct carp_softc *sc;
  701         u_int64_t tmp_counter;
  702         struct timeval sc_tv, ch_tv;
  703 
  704         /* verify that the VHID is valid on the receiving interface */
  705         CARP_LOCK(ifp->if_carp);
  706         TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
  707                 if (sc->sc_vhid == ch->carp_vhid)
  708                         break;
  709 
  710         if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
  711             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
  712                 CARPSTATS_INC(carps_badvhid);
  713                 CARP_UNLOCK(ifp->if_carp);
  714                 m_freem(m);
  715                 return;
  716         }
  717 
  718         getmicrotime(&SC2IFP(sc)->if_lastchange);
  719         SC2IFP(sc)->if_ipackets++;
  720         SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
  721 
  722         if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
  723                 struct ip *ip = mtod(m, struct ip *);
  724                 uint32_t af1 = af;
  725 
  726                 /* BPF wants net byte order */
  727                 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
  728                 ip->ip_off = htons(ip->ip_off);
  729                 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
  730         }
  731 
  732         /* verify the CARP version. */
  733         if (ch->carp_version != CARP_VERSION) {
  734                 CARPSTATS_INC(carps_badver);
  735                 SC2IFP(sc)->if_ierrors++;
  736                 CARP_UNLOCK(ifp->if_carp);
  737                 CARP_DEBUG("%s; invalid version %d\n",
  738                     SC2IFP(sc)->if_xname,
  739                     ch->carp_version);
  740                 m_freem(m);
  741                 return;
  742         }
  743 
  744         /* verify the hash */
  745         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
  746                 CARPSTATS_INC(carps_badauth);
  747                 SC2IFP(sc)->if_ierrors++;
  748                 CARP_UNLOCK(ifp->if_carp);
  749                 CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
  750                 m_freem(m);
  751                 return;
  752         }
  753 
  754         tmp_counter = ntohl(ch->carp_counter[0]);
  755         tmp_counter = tmp_counter<<32;
  756         tmp_counter += ntohl(ch->carp_counter[1]);
  757 
  758         /* XXX Replay protection goes here */
  759 
  760         sc->sc_init_counter = 0;
  761         sc->sc_counter = tmp_counter;
  762 
  763         sc_tv.tv_sec = sc->sc_advbase;
  764         if (carp_suppress_preempt && sc->sc_advskew <  240)
  765                 sc_tv.tv_usec = 240 * 1000000 / 256;
  766         else
  767                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
  768         ch_tv.tv_sec = ch->carp_advbase;
  769         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
  770 
  771         switch (sc->sc_state) {
  772         case INIT:
  773                 break;
  774         case MASTER:
  775                 /*
  776                  * If we receive an advertisement from a master who's going to
  777                  * be more frequent than us, go into BACKUP state.
  778                  */
  779                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
  780                     timevalcmp(&sc_tv, &ch_tv, ==)) {
  781                         callout_stop(&sc->sc_ad_tmo);
  782                         CARP_LOG("%s: MASTER -> BACKUP "
  783                            "(more frequent advertisement received)\n",
  784                            SC2IFP(sc)->if_xname);
  785                         carp_set_state(sc, BACKUP);
  786                         carp_setrun(sc, 0);
  787                         carp_setroute(sc, RTM_DELETE);
  788                 }
  789                 break;
  790         case BACKUP:
  791                 /*
  792                  * If we're pre-empting masters who advertise slower than us,
  793                  * and this one claims to be slower, treat him as down.
  794                  */
  795                 if (carp_opts[CARPCTL_PREEMPT] &&
  796                     timevalcmp(&sc_tv, &ch_tv, <)) {
  797                         CARP_LOG("%s: BACKUP -> MASTER "
  798                             "(preempting a slower master)\n",
  799                             SC2IFP(sc)->if_xname);
  800                         carp_master_down_locked(sc);
  801                         break;
  802                 }
  803 
  804                 /*
  805                  *  If the master is going to advertise at such a low frequency
  806                  *  that he's guaranteed to time out, we'd might as well just
  807                  *  treat him as timed out now.
  808                  */
  809                 sc_tv.tv_sec = sc->sc_advbase * 3;
  810                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
  811                         CARP_LOG("%s: BACKUP -> MASTER "
  812                             "(master timed out)\n",
  813                             SC2IFP(sc)->if_xname);
  814                         carp_master_down_locked(sc);
  815                         break;
  816                 }
  817 
  818                 /*
  819                  * Otherwise, we reset the counter and wait for the next
  820                  * advertisement.
  821                  */
  822                 carp_setrun(sc, af);
  823                 break;
  824         }
  825 
  826         CARP_UNLOCK(ifp->if_carp);
  827 
  828         m_freem(m);
  829         return;
  830 }
  831 
  832 static int
  833 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
  834 {
  835         struct m_tag *mtag;
  836         struct ifnet *ifp = SC2IFP(sc);
  837 
  838         if (sc->sc_init_counter) {
  839                 /* this could also be seconds since unix epoch */
  840                 sc->sc_counter = arc4random();
  841                 sc->sc_counter = sc->sc_counter << 32;
  842                 sc->sc_counter += arc4random();
  843         } else
  844                 sc->sc_counter++;
  845 
  846         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
  847         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
  848 
  849         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
  850 
  851         /* Tag packet for carp_output */
  852         mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
  853         if (mtag == NULL) {
  854                 m_freem(m);
  855                 SC2IFP(sc)->if_oerrors++;
  856                 return (ENOMEM);
  857         }
  858         bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
  859         m_tag_prepend(m, mtag);
  860 
  861         return (0);
  862 }
  863 
  864 static void
  865 carp_send_ad_all(void)
  866 {
  867         struct carp_softc *sc;
  868 
  869         mtx_lock(&carp_mtx);
  870         LIST_FOREACH(sc, &carpif_list, sc_next) {
  871                 if (sc->sc_carpdev == NULL)
  872                         continue;
  873                 CARP_SCLOCK(sc);
  874                 if ((SC2IFP(sc)->if_flags & IFF_UP) &&
  875                     (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
  876                      sc->sc_state == MASTER)
  877                         carp_send_ad_locked(sc);
  878                 CARP_SCUNLOCK(sc);
  879         }
  880         mtx_unlock(&carp_mtx);
  881 }
  882 
  883 static void
  884 carp_send_ad(void *v)
  885 {
  886         struct carp_softc *sc = v;
  887 
  888         CARP_SCLOCK(sc);
  889         carp_send_ad_locked(sc);
  890         CARP_SCUNLOCK(sc);
  891 }
  892 
  893 static void
  894 carp_send_ad_locked(struct carp_softc *sc)
  895 {
  896         struct carp_header ch;
  897         struct timeval tv;
  898         struct carp_header *ch_ptr;
  899         struct mbuf *m;
  900         int len, advbase, advskew;
  901 
  902         CARP_SCLOCK_ASSERT(sc);
  903 
  904         /* bow out if we've lost our UPness or RUNNINGuiness */
  905         if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
  906             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
  907                 advbase = 255;
  908                 advskew = 255;
  909         } else {
  910                 advbase = sc->sc_advbase;
  911                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
  912                         advskew = sc->sc_advskew;
  913                 else
  914                         advskew = 240;
  915                 tv.tv_sec = advbase;
  916                 tv.tv_usec = advskew * 1000000 / 256;
  917         }
  918 
  919         ch.carp_version = CARP_VERSION;
  920         ch.carp_type = CARP_ADVERTISEMENT;
  921         ch.carp_vhid = sc->sc_vhid;
  922         ch.carp_advbase = advbase;
  923         ch.carp_advskew = advskew;
  924         ch.carp_authlen = 7;    /* XXX DEFINE */
  925         ch.carp_pad1 = 0;       /* must be zero */
  926         ch.carp_cksum = 0;
  927 
  928 #ifdef INET
  929         if (sc->sc_ia) {
  930                 struct ip *ip;
  931 
  932                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  933                 if (m == NULL) {
  934                         SC2IFP(sc)->if_oerrors++;
  935                         CARPSTATS_INC(carps_onomem);
  936                         /* XXX maybe less ? */
  937                         if (advbase != 255 || advskew != 255)
  938                                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
  939                                     carp_send_ad, sc);
  940                         return;
  941                 }
  942                 len = sizeof(*ip) + sizeof(ch);
  943                 m->m_pkthdr.len = len;
  944                 m->m_pkthdr.rcvif = NULL;
  945                 m->m_len = len;
  946                 MH_ALIGN(m, m->m_len);
  947                 m->m_flags |= M_MCAST;
  948                 ip = mtod(m, struct ip *);
  949                 ip->ip_v = IPVERSION;
  950                 ip->ip_hl = sizeof(*ip) >> 2;
  951                 ip->ip_tos = IPTOS_LOWDELAY;
  952                 ip->ip_len = len;
  953                 ip->ip_id = ip_newid();
  954                 ip->ip_off = IP_DF;
  955                 ip->ip_ttl = CARP_DFLTTL;
  956                 ip->ip_p = IPPROTO_CARP;
  957                 ip->ip_sum = 0;
  958                 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
  959                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
  960 
  961                 ch_ptr = (struct carp_header *)(&ip[1]);
  962                 bcopy(&ch, ch_ptr, sizeof(ch));
  963                 if (carp_prepare_ad(m, sc, ch_ptr))
  964                         return;
  965 
  966                 m->m_data += sizeof(*ip);
  967                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
  968                 m->m_data -= sizeof(*ip);
  969 
  970                 getmicrotime(&SC2IFP(sc)->if_lastchange);
  971                 SC2IFP(sc)->if_opackets++;
  972                 SC2IFP(sc)->if_obytes += len;
  973                 CARPSTATS_INC(carps_opackets);
  974 
  975                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
  976                         SC2IFP(sc)->if_oerrors++;
  977                         if (sc->sc_sendad_errors < INT_MAX)
  978                                 sc->sc_sendad_errors++;
  979                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
  980                                 carp_suppress_preempt++;
  981                                 if (carp_suppress_preempt == 1) {
  982                                         CARP_SCUNLOCK(sc);
  983                                         carp_send_ad_all();
  984                                         CARP_SCLOCK(sc);
  985                                 }
  986                         }
  987                         sc->sc_sendad_success = 0;
  988                 } else {
  989                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
  990                                 if (++sc->sc_sendad_success >=
  991                                     CARP_SENDAD_MIN_SUCCESS) {
  992                                         carp_suppress_preempt--;
  993                                         sc->sc_sendad_errors = 0;
  994                                 }
  995                         } else
  996                                 sc->sc_sendad_errors = 0;
  997                 }
  998         }
  999 #endif /* INET */
 1000 #ifdef INET6
 1001         if (sc->sc_ia6) {
 1002                 struct ip6_hdr *ip6;
 1003 
 1004                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
 1005                 if (m == NULL) {
 1006                         SC2IFP(sc)->if_oerrors++;
 1007                         CARPSTATS_INC(carps_onomem);
 1008                         /* XXX maybe less ? */
 1009                         if (advbase != 255 || advskew != 255)
 1010                                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 1011                                     carp_send_ad, sc);
 1012                         return;
 1013                 }
 1014                 len = sizeof(*ip6) + sizeof(ch);
 1015                 m->m_pkthdr.len = len;
 1016                 m->m_pkthdr.rcvif = NULL;
 1017                 m->m_len = len;
 1018                 MH_ALIGN(m, m->m_len);
 1019                 m->m_flags |= M_MCAST;
 1020                 ip6 = mtod(m, struct ip6_hdr *);
 1021                 bzero(ip6, sizeof(*ip6));
 1022                 ip6->ip6_vfc |= IPV6_VERSION;
 1023                 ip6->ip6_hlim = CARP_DFLTTL;
 1024                 ip6->ip6_nxt = IPPROTO_CARP;
 1025                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
 1026                     sizeof(struct in6_addr));
 1027                 /* set the multicast destination */
 1028 
 1029                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 1030                 ip6->ip6_dst.s6_addr8[15] = 0x12;
 1031                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
 1032                         SC2IFP(sc)->if_oerrors++;
 1033                         m_freem(m);
 1034                         CARP_DEBUG("%s: in6_setscope failed\n", __func__);
 1035                         return;
 1036                 }
 1037 
 1038                 ch_ptr = (struct carp_header *)(&ip6[1]);
 1039                 bcopy(&ch, ch_ptr, sizeof(ch));
 1040                 if (carp_prepare_ad(m, sc, ch_ptr))
 1041                         return;
 1042 
 1043                 m->m_data += sizeof(*ip6);
 1044                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
 1045                 m->m_data -= sizeof(*ip6);
 1046 
 1047                 getmicrotime(&SC2IFP(sc)->if_lastchange);
 1048                 SC2IFP(sc)->if_opackets++;
 1049                 SC2IFP(sc)->if_obytes += len;
 1050                 CARPSTATS_INC(carps_opackets6);
 1051 
 1052                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
 1053                         SC2IFP(sc)->if_oerrors++;
 1054                         if (sc->sc_sendad_errors < INT_MAX)
 1055                                 sc->sc_sendad_errors++;
 1056                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
 1057                                 carp_suppress_preempt++;
 1058                                 if (carp_suppress_preempt == 1) {
 1059                                         CARP_SCUNLOCK(sc);
 1060                                         carp_send_ad_all();
 1061                                         CARP_SCLOCK(sc);
 1062                                 }
 1063                         }
 1064                         sc->sc_sendad_success = 0;
 1065                 } else {
 1066                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
 1067                                 if (++sc->sc_sendad_success >=
 1068                                     CARP_SENDAD_MIN_SUCCESS) {
 1069                                         carp_suppress_preempt--;
 1070                                         sc->sc_sendad_errors = 0;
 1071                                 }
 1072                         } else
 1073                                 sc->sc_sendad_errors = 0;
 1074                 }
 1075         }
 1076 #endif /* INET6 */
 1077 
 1078         if (advbase != 255 || advskew != 255)
 1079                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 1080                     carp_send_ad, sc);
 1081 
 1082 }
 1083 
 1084 /*
 1085  * Broadcast a gratuitous ARP request containing
 1086  * the virtual router MAC address for each IP address
 1087  * associated with the virtual router.
 1088  */
 1089 static void
 1090 carp_send_arp(struct carp_softc *sc)
 1091 {
 1092         struct ifaddr *ifa;
 1093 
 1094         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 1095 
 1096                 if (ifa->ifa_addr->sa_family != AF_INET)
 1097                         continue;
 1098 
 1099 /*              arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
 1100                 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
 1101 
 1102                 DELAY(1000);    /* XXX */
 1103         }
 1104 }
 1105 
 1106 #ifdef INET6
 1107 static void
 1108 carp_send_na(struct carp_softc *sc)
 1109 {
 1110         struct ifaddr *ifa;
 1111         struct in6_addr *in6;
 1112         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 1113 
 1114         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 1115 
 1116                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1117                         continue;
 1118 
 1119                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
 1120                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
 1121                     ND_NA_FLAG_OVERRIDE, 1, NULL);
 1122                 DELAY(1000);    /* XXX */
 1123         }
 1124 }
 1125 #endif /* INET6 */
 1126 
 1127 static int
 1128 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
 1129 {
 1130         struct carp_softc *vh;
 1131         struct ifaddr *ifa;
 1132         int count = 0;
 1133 
 1134         CARP_LOCK_ASSERT(cif);
 1135 
 1136         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1137                 if ((type == CARP_COUNT_RUNNING &&
 1138                     (SC2IFP(vh)->if_flags & IFF_UP) &&
 1139                     (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
 1140                     (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
 1141                         IF_ADDR_RLOCK(SC2IFP(vh));
 1142                         TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
 1143                             ifa_list) {
 1144                                 if (ifa->ifa_addr->sa_family == AF_INET &&
 1145                                     ia->ia_addr.sin_addr.s_addr ==
 1146                                     ifatoia(ifa)->ia_addr.sin_addr.s_addr)
 1147                                         count++;
 1148                         }
 1149                         IF_ADDR_RUNLOCK(SC2IFP(vh));
 1150                 }
 1151         }
 1152         return (count);
 1153 }
 1154 
 1155 int
 1156 carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia,
 1157     struct in_addr *isaddr, u_int8_t **enaddr)
 1158 {
 1159         struct carp_if *cif;
 1160         struct carp_softc *vh;
 1161         int index, count = 0;
 1162         struct ifaddr *ifa;
 1163 
 1164         cif = ifp->if_carp;
 1165         CARP_LOCK(cif);
 1166 
 1167         if (carp_opts[CARPCTL_ARPBALANCE]) {
 1168                 /*
 1169                  * XXX proof of concept implementation.
 1170                  * We use the source ip to decide which virtual host should
 1171                  * handle the request. If we're master of that virtual host,
 1172                  * then we respond, otherwise, just drop the arp packet on
 1173                  * the floor.
 1174                  */
 1175                 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
 1176                 if (count == 0) {
 1177                         /* should never reach this */
 1178                         CARP_UNLOCK(cif);
 1179                         return (0);
 1180                 }
 1181 
 1182                 /* this should be a hash, like pf_hash() */
 1183                 index = ntohl(isaddr->s_addr) % count;
 1184                 count = 0;
 1185 
 1186                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1187                         if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 1188                             (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
 1189                                 IF_ADDR_RLOCK(SC2IFP(vh));
 1190                                 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
 1191                                     ifa_list) {
 1192                                         if (ifa->ifa_addr->sa_family ==
 1193                                             AF_INET &&
 1194                                             ia->ia_addr.sin_addr.s_addr ==
 1195                                             ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
 1196                                                 if (count == index) {
 1197                                                         if (vh->sc_state ==
 1198                                                             MASTER) {
 1199                                                                 *enaddr = IF_LLADDR(vh->sc_ifp);
 1200                                                                 IF_ADDR_RUNLOCK(SC2IFP(vh));
 1201                                                                 CARP_UNLOCK(cif);
 1202                                                                 return (1);
 1203                                                         } else {
 1204                                                                 IF_ADDR_RUNLOCK(SC2IFP(vh));
 1205                                                                 CARP_UNLOCK(cif);
 1206                                                                 return (0);
 1207                                                         }
 1208                                                 }
 1209                                                 count++;
 1210                                         }
 1211                                 }
 1212                                 IF_ADDR_RUNLOCK(SC2IFP(vh));
 1213                         }
 1214                 }
 1215         } else {
 1216                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1217                         if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 1218                             (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 1219                             ia->ia_ifp == SC2IFP(vh) &&
 1220                             vh->sc_state == MASTER) {
 1221                                 *enaddr = IF_LLADDR(vh->sc_ifp);
 1222                                 CARP_UNLOCK(cif);
 1223                                 return (1);
 1224                         }
 1225                 }
 1226         }
 1227         CARP_UNLOCK(cif);
 1228         return (0);
 1229 }
 1230 
 1231 #ifdef INET6
 1232 struct ifaddr *
 1233 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
 1234 {
 1235         struct carp_if *cif;
 1236         struct carp_softc *vh;
 1237         struct ifaddr *ifa;
 1238 
 1239         cif = ifp->if_carp;
 1240         CARP_LOCK(cif);
 1241         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1242                 IF_ADDR_RLOCK(SC2IFP(vh));
 1243                 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
 1244                         if (IN6_ARE_ADDR_EQUAL(taddr,
 1245                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
 1246                             (SC2IFP(vh)->if_flags & IFF_UP) &&
 1247                             (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 1248                             vh->sc_state == MASTER) {
 1249                                 ifa_ref(ifa);
 1250                                 IF_ADDR_RUNLOCK(SC2IFP(vh));
 1251                                 CARP_UNLOCK(cif);
 1252                                 return (ifa);
 1253                         }
 1254                 }
 1255                 IF_ADDR_RUNLOCK(SC2IFP(vh));
 1256         }
 1257         CARP_UNLOCK(cif);
 1258         
 1259         return (NULL);
 1260 }
 1261 
 1262 caddr_t
 1263 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 1264 {
 1265         struct m_tag *mtag;
 1266         struct carp_if *cif;
 1267         struct carp_softc *sc;
 1268         struct ifaddr *ifa;
 1269 
 1270         cif = ifp->if_carp;
 1271         CARP_LOCK(cif);
 1272         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
 1273                 IF_ADDR_RLOCK(SC2IFP(sc));
 1274                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 1275                         if (IN6_ARE_ADDR_EQUAL(taddr,
 1276                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
 1277                             (SC2IFP(sc)->if_flags & IFF_UP) &&
 1278                             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
 1279                                 struct ifnet *ifp = SC2IFP(sc);
 1280                                 mtag = m_tag_get(PACKET_TAG_CARP,
 1281                                     sizeof(struct ifnet *), M_NOWAIT);
 1282                                 if (mtag == NULL) {
 1283                                         /* better a bit than nothing */
 1284                                         IF_ADDR_RUNLOCK(SC2IFP(sc));
 1285                                         CARP_UNLOCK(cif);
 1286                                         return (IF_LLADDR(sc->sc_ifp));
 1287                                 }
 1288                                 bcopy(&ifp, (caddr_t)(mtag + 1),
 1289                                     sizeof(struct ifnet *));
 1290                                 m_tag_prepend(m, mtag);
 1291 
 1292                                 IF_ADDR_RUNLOCK(SC2IFP(sc));
 1293                                 CARP_UNLOCK(cif);
 1294                                 return (IF_LLADDR(sc->sc_ifp));
 1295                         }
 1296                 }
 1297                 IF_ADDR_RUNLOCK(SC2IFP(sc));
 1298         }
 1299         CARP_UNLOCK(cif);
 1300 
 1301         return (NULL);
 1302 }
 1303 #endif
 1304 
 1305 struct ifnet *
 1306 carp_forus(struct ifnet *ifp, u_char *dhost)
 1307 {
 1308         struct carp_if *cif;
 1309         struct carp_softc *vh;
 1310         u_int8_t *ena = dhost;
 1311 
 1312         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
 1313                 return (NULL);
 1314 
 1315         cif = ifp->if_carp;
 1316         CARP_LOCK(cif);
 1317         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
 1318                 if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 1319                     (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 1320                     vh->sc_state == MASTER &&
 1321                     !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
 1322                         CARP_UNLOCK(cif);
 1323                         return (SC2IFP(vh));
 1324                 }
 1325 
 1326         CARP_UNLOCK(cif);
 1327         return (NULL);
 1328 }
 1329 
 1330 static void
 1331 carp_master_down(void *v)
 1332 {
 1333         struct carp_softc *sc = v;
 1334 
 1335         CARP_SCLOCK(sc);
 1336         carp_master_down_locked(sc);
 1337         CARP_SCUNLOCK(sc);
 1338 }
 1339 
 1340 static void
 1341 carp_master_down_locked(struct carp_softc *sc)
 1342 {
 1343         if (sc->sc_carpdev)
 1344                 CARP_SCLOCK_ASSERT(sc);
 1345 
 1346         switch (sc->sc_state) {
 1347         case INIT:
 1348                 printf("%s: master_down event in INIT state\n",
 1349                     SC2IFP(sc)->if_xname);
 1350                 break;
 1351         case MASTER:
 1352                 break;
 1353         case BACKUP:
 1354                 carp_set_state(sc, MASTER);
 1355                 carp_send_ad_locked(sc);
 1356                 carp_send_arp(sc);
 1357 #ifdef INET6
 1358                 carp_send_na(sc);
 1359 #endif /* INET6 */
 1360                 carp_setrun(sc, 0);
 1361                 carp_setroute(sc, RTM_ADD);
 1362                 break;
 1363         }
 1364 }
 1365 
 1366 /*
 1367  * When in backup state, af indicates whether to reset the master down timer
 1368  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
 1369  */
 1370 static void
 1371 carp_setrun(struct carp_softc *sc, sa_family_t af)
 1372 {
 1373         struct timeval tv;
 1374 
 1375         if (sc->sc_carpdev == NULL) {
 1376                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 1377                 carp_set_state(sc, INIT);
 1378                 return;
 1379         } else
 1380                 CARP_SCLOCK_ASSERT(sc);
 1381 
 1382         if (SC2IFP(sc)->if_flags & IFF_UP &&
 1383             sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) &&
 1384             sc->sc_carpdev->if_link_state == LINK_STATE_UP)
 1385                 SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
 1386         else {
 1387                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 1388                 carp_setroute(sc, RTM_DELETE);
 1389                 return;
 1390         }
 1391 
 1392         switch (sc->sc_state) {
 1393         case INIT:
 1394                 CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
 1395                 carp_set_state(sc, BACKUP);
 1396                 carp_setroute(sc, RTM_DELETE);
 1397                 carp_setrun(sc, 0);
 1398                 break;
 1399         case BACKUP:
 1400                 callout_stop(&sc->sc_ad_tmo);
 1401                 tv.tv_sec = 3 * sc->sc_advbase;
 1402                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1403                 switch (af) {
 1404 #ifdef INET
 1405                 case AF_INET:
 1406                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1407                             carp_master_down, sc);
 1408                         break;
 1409 #endif /* INET */
 1410 #ifdef INET6
 1411                 case AF_INET6:
 1412                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1413                             carp_master_down, sc);
 1414                         break;
 1415 #endif /* INET6 */
 1416                 default:
 1417                         if (sc->sc_naddrs)
 1418                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1419                                     carp_master_down, sc);
 1420                         if (sc->sc_naddrs6)
 1421                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1422                                     carp_master_down, sc);
 1423                         break;
 1424                 }
 1425                 break;
 1426         case MASTER:
 1427                 tv.tv_sec = sc->sc_advbase;
 1428                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1429                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 1430                     carp_send_ad, sc);
 1431                 break;
 1432         }
 1433 }
 1434 
 1435 static void
 1436 carp_multicast_cleanup(struct carp_softc *sc, int dofree)
 1437 {
 1438         struct ip_moptions *imo = &sc->sc_imo;
 1439         u_int16_t n = imo->imo_num_memberships;
 1440 
 1441         /* Clean up our own multicast memberships */
 1442         while (n-- > 0) {
 1443                 if (imo->imo_membership[n] != NULL) {
 1444                         if (dofree)
 1445                                 in_delmulti(imo->imo_membership[n]);
 1446                         imo->imo_membership[n] = NULL;
 1447                 }
 1448         }
 1449         KASSERT(imo->imo_mfilters == NULL,
 1450            ("%s: imo_mfilters != NULL", __func__));
 1451         imo->imo_num_memberships = 0;
 1452         imo->imo_multicast_ifp = NULL;
 1453 }
 1454 
 1455 #ifdef INET6
 1456 static void
 1457 carp_multicast6_cleanup(struct carp_softc *sc, int dofree)
 1458 {
 1459         struct ip6_moptions *im6o = &sc->sc_im6o;
 1460         u_int16_t n = im6o->im6o_num_memberships;
 1461 
 1462         while (n-- > 0) {
 1463                 if (im6o->im6o_membership[n] != NULL) {
 1464                         if (dofree)
 1465                                 in6_mc_leave(im6o->im6o_membership[n], NULL);
 1466                         im6o->im6o_membership[n] = NULL;
 1467                 }
 1468         }
 1469         KASSERT(im6o->im6o_mfilters == NULL,
 1470            ("%s: im6o_mfilters != NULL", __func__));
 1471         im6o->im6o_num_memberships = 0;
 1472         im6o->im6o_multicast_ifp = NULL;
 1473 }
 1474 #endif
 1475 
 1476 static int
 1477 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
 1478 {
 1479         struct ifnet *ifp;
 1480         struct carp_if *cif;
 1481         struct in_ifaddr *ia, *ia_if;
 1482         struct ip_moptions *imo = &sc->sc_imo;
 1483         struct in_addr addr;
 1484         u_long iaddr = htonl(sin->sin_addr.s_addr);
 1485         int own, error;
 1486 
 1487         if (sin->sin_addr.s_addr == 0) {
 1488                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
 1489                         carp_set_state(sc, INIT);
 1490                 if (sc->sc_naddrs)
 1491                         SC2IFP(sc)->if_flags |= IFF_UP;
 1492                 if (sc->sc_carpdev)
 1493                         CARP_SCLOCK(sc);
 1494                 carp_setrun(sc, 0);
 1495                 if (sc->sc_carpdev)
 1496                         CARP_SCUNLOCK(sc);
 1497                 return (0);
 1498         }
 1499 
 1500         /* we have to do it by hands to check we won't match on us */
 1501         ia_if = NULL; own = 0;
 1502         IN_IFADDR_RLOCK();
 1503         TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 1504                 /* and, yeah, we need a multicast-capable iface too */
 1505                 if (ia->ia_ifp != SC2IFP(sc) &&
 1506                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
 1507                     (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
 1508                         if (!ia_if)
 1509                                 ia_if = ia;
 1510                         if (sin->sin_addr.s_addr ==
 1511                             ia->ia_addr.sin_addr.s_addr)
 1512                                 own++;
 1513                 }
 1514         }
 1515 
 1516         if (!ia_if) {
 1517                 IN_IFADDR_RUNLOCK();
 1518                 return (EADDRNOTAVAIL);
 1519         }
 1520 
 1521         ia = ia_if;
 1522         ifa_ref(&ia->ia_ifa);
 1523         IN_IFADDR_RUNLOCK();
 1524 
 1525         ifp = ia->ia_ifp;
 1526 
 1527         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
 1528             (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) {
 1529                 ifa_free(&ia->ia_ifa);
 1530                 return (EADDRNOTAVAIL);
 1531         }
 1532 
 1533         if (imo->imo_num_memberships == 0) {
 1534                 addr.s_addr = htonl(INADDR_CARP_GROUP);
 1535                 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) ==
 1536                     NULL) {
 1537                         ifa_free(&ia->ia_ifa);
 1538                         return (ENOBUFS);
 1539                 }
 1540                 imo->imo_num_memberships++;
 1541                 imo->imo_multicast_ifp = ifp;
 1542                 imo->imo_multicast_ttl = CARP_DFLTTL;
 1543                 imo->imo_multicast_loop = 0;
 1544         }
 1545 
 1546         if (!ifp->if_carp) {
 1547 
 1548                 cif = malloc(sizeof(*cif), M_CARP,
 1549                     M_WAITOK|M_ZERO);
 1550                 if (!cif) {
 1551                         error = ENOBUFS;
 1552                         goto cleanup;
 1553                 }
 1554                 if ((error = ifpromisc(ifp, 1))) {
 1555                         free(cif, M_CARP);
 1556                         goto cleanup;
 1557                 }
 1558                 
 1559                 CARP_LOCK_INIT(cif);
 1560                 CARP_LOCK(cif);
 1561                 cif->vhif_ifp = ifp;
 1562                 TAILQ_INIT(&cif->vhif_vrs);
 1563                 ifp->if_carp = cif;
 1564 
 1565         } else {
 1566                 struct carp_softc *vr;
 1567 
 1568                 cif = (struct carp_if *)ifp->if_carp;
 1569                 CARP_LOCK(cif);
 1570                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 1571                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
 1572                                 CARP_UNLOCK(cif);
 1573                                 error = EEXIST;
 1574                                 goto cleanup;
 1575                         }
 1576         }
 1577         sc->sc_ia = ia;
 1578         sc->sc_carpdev = ifp;
 1579 
 1580         { /* XXX prevent endless loop if already in queue */
 1581         struct carp_softc *vr, *after = NULL;
 1582         int myself = 0;
 1583         cif = (struct carp_if *)ifp->if_carp;
 1584 
 1585         /* XXX: cif should not change, right? So we still hold the lock */
 1586         CARP_LOCK_ASSERT(cif);
 1587 
 1588         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
 1589                 if (vr == sc)
 1590                         myself = 1;
 1591                 if (vr->sc_vhid < sc->sc_vhid)
 1592                         after = vr;
 1593         }
 1594 
 1595         if (!myself) {
 1596                 /* We're trying to keep things in order */
 1597                 if (after == NULL) {
 1598                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
 1599                 } else {
 1600                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
 1601                 }
 1602                 cif->vhif_nvrs++;
 1603         }
 1604         }
 1605 
 1606         sc->sc_naddrs++;
 1607         SC2IFP(sc)->if_flags |= IFF_UP;
 1608         if (own)
 1609                 sc->sc_advskew = 0;
 1610         carp_sc_state_locked(sc);
 1611         carp_setrun(sc, 0);
 1612 
 1613         CARP_UNLOCK(cif);
 1614         ifa_free(&ia->ia_ifa);  /* XXXRW: should hold reference for softc. */
 1615 
 1616         return (0);
 1617 
 1618 cleanup:
 1619         in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
 1620         ifa_free(&ia->ia_ifa);
 1621         return (error);
 1622 }
 1623 
 1624 static int
 1625 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
 1626 {
 1627         int error = 0;
 1628 
 1629         if (!--sc->sc_naddrs) {
 1630                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 1631                 struct ip_moptions *imo = &sc->sc_imo;
 1632 
 1633                 CARP_LOCK(cif);
 1634                 callout_stop(&sc->sc_ad_tmo);
 1635                 SC2IFP(sc)->if_flags &= ~IFF_UP;
 1636                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 1637                 sc->sc_vhid = -1;
 1638                 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
 1639                 imo->imo_multicast_ifp = NULL;
 1640                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
 1641                 if (!--cif->vhif_nvrs) {
 1642                         sc->sc_carpdev->if_carp = NULL;
 1643                         CARP_LOCK_DESTROY(cif);
 1644                         free(cif, M_CARP);
 1645                 } else {
 1646                         CARP_UNLOCK(cif);
 1647                 }
 1648         }
 1649 
 1650         return (error);
 1651 }
 1652 
 1653 #ifdef INET6
 1654 static int
 1655 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
 1656 {
 1657         struct ifnet *ifp;
 1658         struct carp_if *cif;
 1659         struct in6_ifaddr *ia, *ia_if;
 1660         struct ip6_moptions *im6o = &sc->sc_im6o;
 1661         struct in6_addr in6;
 1662         int own, error;
 1663 
 1664         error = 0;
 1665 
 1666         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 1667                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
 1668                         carp_set_state(sc, INIT);
 1669                 if (sc->sc_naddrs6)
 1670                         SC2IFP(sc)->if_flags |= IFF_UP;
 1671                 if (sc->sc_carpdev)
 1672                         CARP_SCLOCK(sc);
 1673                 carp_setrun(sc, 0);
 1674                 if (sc->sc_carpdev)
 1675                         CARP_SCUNLOCK(sc);
 1676                 return (0);
 1677         }
 1678 
 1679         /* we have to do it by hands to check we won't match on us */
 1680         ia_if = NULL; own = 0;
 1681         IN6_IFADDR_RLOCK();
 1682         TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 1683                 int i;
 1684 
 1685                 for (i = 0; i < 4; i++) {
 1686                         if ((sin6->sin6_addr.s6_addr32[i] &
 1687                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
 1688                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
 1689                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
 1690                                 break;
 1691                 }
 1692                 /* and, yeah, we need a multicast-capable iface too */
 1693                 if (ia->ia_ifp != SC2IFP(sc) &&
 1694                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
 1695                     (i == 4)) {
 1696                         if (!ia_if)
 1697                                 ia_if = ia;
 1698                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
 1699                             &ia->ia_addr.sin6_addr))
 1700                                 own++;
 1701                 }
 1702         }
 1703 
 1704         if (!ia_if) {
 1705                 IN6_IFADDR_RUNLOCK();
 1706                 return (EADDRNOTAVAIL);
 1707         }
 1708         ia = ia_if;
 1709         ifa_ref(&ia->ia_ifa);
 1710         IN6_IFADDR_RUNLOCK();
 1711         ifp = ia->ia_ifp;
 1712 
 1713         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
 1714             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) {
 1715                 ifa_free(&ia->ia_ifa);
 1716                 return (EADDRNOTAVAIL);
 1717         }
 1718 
 1719         if (!sc->sc_naddrs6) {
 1720                 struct in6_multi *in6m;
 1721 
 1722                 im6o->im6o_multicast_ifp = ifp;
 1723 
 1724                 /* join CARP multicast address */
 1725                 bzero(&in6, sizeof(in6));
 1726                 in6.s6_addr16[0] = htons(0xff02);
 1727                 in6.s6_addr8[15] = 0x12;
 1728                 if (in6_setscope(&in6, ifp, NULL) != 0)
 1729                         goto cleanup;
 1730                 in6m = NULL;
 1731                 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
 1732                 if (error)
 1733                         goto cleanup;
 1734                 im6o->im6o_membership[0] = in6m;
 1735                 im6o->im6o_num_memberships++;
 1736 
 1737                 /* join solicited multicast address */
 1738                 bzero(&in6, sizeof(in6));
 1739                 in6.s6_addr16[0] = htons(0xff02);
 1740                 in6.s6_addr32[1] = 0;
 1741                 in6.s6_addr32[2] = htonl(1);
 1742                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
 1743                 in6.s6_addr8[12] = 0xff;
 1744                 if (in6_setscope(&in6, ifp, NULL) != 0)
 1745                         goto cleanup;
 1746                 in6m = NULL;
 1747                 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
 1748                 if (error)
 1749                         goto cleanup;
 1750                 im6o->im6o_membership[1] = in6m;
 1751                 im6o->im6o_num_memberships++;
 1752         }
 1753 
 1754         if (!ifp->if_carp) {
 1755                 cif = malloc(sizeof(*cif), M_CARP,
 1756                     M_WAITOK|M_ZERO);
 1757                 if (!cif) {
 1758                         error = ENOBUFS;
 1759                         goto cleanup;
 1760                 }
 1761                 if ((error = ifpromisc(ifp, 1))) {
 1762                         free(cif, M_CARP);
 1763                         goto cleanup;
 1764                 }
 1765 
 1766                 CARP_LOCK_INIT(cif);
 1767                 CARP_LOCK(cif);
 1768                 cif->vhif_ifp = ifp;
 1769                 TAILQ_INIT(&cif->vhif_vrs);
 1770                 ifp->if_carp = cif;
 1771 
 1772         } else {
 1773                 struct carp_softc *vr;
 1774 
 1775                 cif = (struct carp_if *)ifp->if_carp;
 1776                 CARP_LOCK(cif);
 1777                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 1778                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
 1779                                 CARP_UNLOCK(cif);
 1780                                 error = EINVAL;
 1781                                 goto cleanup;
 1782                         }
 1783         }
 1784         sc->sc_ia6 = ia;
 1785         sc->sc_carpdev = ifp;
 1786 
 1787         { /* XXX prevent endless loop if already in queue */
 1788         struct carp_softc *vr, *after = NULL;
 1789         int myself = 0;
 1790         cif = (struct carp_if *)ifp->if_carp;
 1791         CARP_LOCK_ASSERT(cif);
 1792 
 1793         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
 1794                 if (vr == sc)
 1795                         myself = 1;
 1796                 if (vr->sc_vhid < sc->sc_vhid)
 1797                         after = vr;
 1798         }
 1799 
 1800         if (!myself) {
 1801                 /* We're trying to keep things in order */
 1802                 if (after == NULL) {
 1803                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
 1804                 } else {
 1805                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
 1806                 }
 1807                 cif->vhif_nvrs++;
 1808         }
 1809         }
 1810 
 1811         sc->sc_naddrs6++;
 1812         SC2IFP(sc)->if_flags |= IFF_UP;
 1813         if (own)
 1814                 sc->sc_advskew = 0;
 1815         carp_sc_state_locked(sc);
 1816         carp_setrun(sc, 0);
 1817 
 1818         CARP_UNLOCK(cif);
 1819         ifa_free(&ia->ia_ifa);  /* XXXRW: should hold reference for softc. */
 1820 
 1821         return (0);
 1822 
 1823 cleanup:
 1824         if (!sc->sc_naddrs6)
 1825                 carp_multicast6_cleanup(sc, 1);
 1826         ifa_free(&ia->ia_ifa);
 1827         return (error);
 1828 }
 1829 
 1830 static int
 1831 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
 1832 {
 1833         int error = 0;
 1834 
 1835         if (!--sc->sc_naddrs6) {
 1836                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 1837 
 1838                 CARP_LOCK(cif);
 1839                 callout_stop(&sc->sc_ad_tmo);
 1840                 SC2IFP(sc)->if_flags &= ~IFF_UP;
 1841                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 1842                 sc->sc_vhid = -1;
 1843                 carp_multicast6_cleanup(sc, 1);
 1844                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
 1845                 if (!--cif->vhif_nvrs) {
 1846                         CARP_LOCK_DESTROY(cif);
 1847                         sc->sc_carpdev->if_carp = NULL;
 1848                         free(cif, M_CARP);
 1849                 } else
 1850                         CARP_UNLOCK(cif);
 1851         }
 1852 
 1853         return (error);
 1854 }
 1855 #endif /* INET6 */
 1856 
 1857 static int
 1858 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
 1859 {
 1860         struct carp_softc *sc = ifp->if_softc, *vr;
 1861         struct carpreq carpr;
 1862         struct ifaddr *ifa;
 1863         struct ifreq *ifr;
 1864         struct ifaliasreq *ifra;
 1865         int locked = 0, error = 0;
 1866 
 1867         ifa = (struct ifaddr *)addr;
 1868         ifra = (struct ifaliasreq *)addr;
 1869         ifr = (struct ifreq *)addr;
 1870 
 1871         switch (cmd) {
 1872         case SIOCSIFADDR:
 1873                 switch (ifa->ifa_addr->sa_family) {
 1874 #ifdef INET
 1875                 case AF_INET:
 1876                         SC2IFP(sc)->if_flags |= IFF_UP;
 1877                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
 1878                             sizeof(struct sockaddr));
 1879                         error = carp_set_addr(sc, satosin(ifa->ifa_addr));
 1880                         break;
 1881 #endif /* INET */
 1882 #ifdef INET6
 1883                 case AF_INET6:
 1884                         SC2IFP(sc)->if_flags |= IFF_UP;
 1885                         error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
 1886                         break;
 1887 #endif /* INET6 */
 1888                 default:
 1889                         error = EAFNOSUPPORT;
 1890                         break;
 1891                 }
 1892                 break;
 1893 
 1894         case SIOCAIFADDR:
 1895                 switch (ifa->ifa_addr->sa_family) {
 1896 #ifdef INET
 1897                 case AF_INET:
 1898                         SC2IFP(sc)->if_flags |= IFF_UP;
 1899                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
 1900                             sizeof(struct sockaddr));
 1901                         error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
 1902                         break;
 1903 #endif /* INET */
 1904 #ifdef INET6
 1905                 case AF_INET6:
 1906                         SC2IFP(sc)->if_flags |= IFF_UP;
 1907                         error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
 1908                         break;
 1909 #endif /* INET6 */
 1910                 default:
 1911                         error = EAFNOSUPPORT;
 1912                         break;
 1913                 }
 1914                 break;
 1915 
 1916         case SIOCDIFADDR:
 1917                 switch (ifa->ifa_addr->sa_family) {
 1918 #ifdef INET
 1919                 case AF_INET:
 1920                         error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
 1921                         break;
 1922 #endif /* INET */
 1923 #ifdef INET6
 1924                 case AF_INET6:
 1925                         error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
 1926                         break;
 1927 #endif /* INET6 */
 1928                 default:
 1929                         error = EAFNOSUPPORT;
 1930                         break;
 1931                 }
 1932                 break;
 1933 
 1934         case SIOCSIFFLAGS:
 1935                 if (sc->sc_carpdev) {
 1936                         locked = 1;
 1937                         CARP_SCLOCK(sc);
 1938                 }
 1939                 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
 1940                         callout_stop(&sc->sc_ad_tmo);
 1941                         callout_stop(&sc->sc_md_tmo);
 1942                         callout_stop(&sc->sc_md6_tmo);
 1943                         if (sc->sc_state == MASTER)
 1944                                 carp_send_ad_locked(sc);
 1945                         carp_set_state(sc, INIT);
 1946                         carp_setrun(sc, 0);
 1947                 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
 1948                         SC2IFP(sc)->if_flags |= IFF_UP;
 1949                         carp_setrun(sc, 0);
 1950                 }
 1951                 break;
 1952 
 1953         case SIOCSVH:
 1954                 error = priv_check(curthread, PRIV_NETINET_CARP);
 1955                 if (error)
 1956                         break;
 1957                 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
 1958                         break;
 1959                 error = 1;
 1960                 if (sc->sc_carpdev) {
 1961                         locked = 1;
 1962                         CARP_SCLOCK(sc);
 1963                 }
 1964                 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
 1965                         switch (carpr.carpr_state) {
 1966                         case BACKUP:
 1967                                 callout_stop(&sc->sc_ad_tmo);
 1968                                 carp_set_state(sc, BACKUP);
 1969                                 carp_setrun(sc, 0);
 1970                                 carp_setroute(sc, RTM_DELETE);
 1971                                 break;
 1972                         case MASTER:
 1973                                 carp_master_down_locked(sc);
 1974                                 break;
 1975                         default:
 1976                                 break;
 1977                         }
 1978                 }
 1979                 if (carpr.carpr_vhid > 0) {
 1980                         if (carpr.carpr_vhid > 255) {
 1981                                 error = EINVAL;
 1982                                 break;
 1983                         }
 1984                         if (sc->sc_carpdev) {
 1985                                 struct carp_if *cif;
 1986                                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 1987                                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 1988                                         if (vr != sc &&
 1989                                             vr->sc_vhid == carpr.carpr_vhid) {
 1990                                                 error = EEXIST;
 1991                                                 break;
 1992                                         }
 1993                                 if (error == EEXIST)
 1994                                         break;
 1995                         }
 1996                         sc->sc_vhid = carpr.carpr_vhid;
 1997                         IF_LLADDR(sc->sc_ifp)[0] = 0;
 1998                         IF_LLADDR(sc->sc_ifp)[1] = 0;
 1999                         IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
 2000                         IF_LLADDR(sc->sc_ifp)[3] = 0;
 2001                         IF_LLADDR(sc->sc_ifp)[4] = 1;
 2002                         IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
 2003                         error--;
 2004                 }
 2005                 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
 2006                         if (carpr.carpr_advskew >= 255) {
 2007                                 error = EINVAL;
 2008                                 break;
 2009                         }
 2010                         if (carpr.carpr_advbase > 255) {
 2011                                 error = EINVAL;
 2012                                 break;
 2013                         }
 2014                         sc->sc_advbase = carpr.carpr_advbase;
 2015                         sc->sc_advskew = carpr.carpr_advskew;
 2016                         error--;
 2017                 }
 2018                 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
 2019                 if (error > 0)
 2020                         error = EINVAL;
 2021                 else {
 2022                         error = 0;
 2023                         carp_setrun(sc, 0);
 2024                 }
 2025                 break;
 2026 
 2027         case SIOCGVH:
 2028                 /* XXX: lockless read */
 2029                 bzero(&carpr, sizeof(carpr));
 2030                 carpr.carpr_state = sc->sc_state;
 2031                 carpr.carpr_vhid = sc->sc_vhid;
 2032                 carpr.carpr_advbase = sc->sc_advbase;
 2033                 carpr.carpr_advskew = sc->sc_advskew;
 2034                 error = priv_check(curthread, PRIV_NETINET_CARP);
 2035                 if (error == 0)
 2036                         bcopy(sc->sc_key, carpr.carpr_key,
 2037                             sizeof(carpr.carpr_key));
 2038                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
 2039                 break;
 2040 
 2041         default:
 2042                 error = EINVAL;
 2043         }
 2044 
 2045         if (locked)
 2046                 CARP_SCUNLOCK(sc);
 2047 
 2048         carp_hmac_prepare(sc);
 2049 
 2050         return (error);
 2051 }
 2052 
 2053 /*
 2054  * XXX: this is looutput. We should eventually use it from there.
 2055  */
 2056 static int
 2057 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 2058     struct route *ro)
 2059 {
 2060         u_int32_t af;
 2061         struct rtentry *rt = NULL;
 2062 
 2063         M_ASSERTPKTHDR(m); /* check if we have the packet header */
 2064 
 2065         if (ro != NULL)
 2066                 rt = ro->ro_rt;
 2067         if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 2068                 m_freem(m);
 2069                 return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
 2070                         rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
 2071         }
 2072 
 2073         ifp->if_opackets++;
 2074         ifp->if_obytes += m->m_pkthdr.len;
 2075 
 2076         /* BPF writes need to be handled specially. */
 2077         if (dst->sa_family == AF_UNSPEC) {
 2078                 bcopy(dst->sa_data, &af, sizeof(af));
 2079                 dst->sa_family = af;
 2080         }
 2081 
 2082 #if 1   /* XXX */
 2083         switch (dst->sa_family) {
 2084         case AF_INET:
 2085         case AF_INET6:
 2086         case AF_IPX:
 2087         case AF_APPLETALK:
 2088                 break;
 2089         default:
 2090                 printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
 2091                 m_freem(m);
 2092                 return (EAFNOSUPPORT);
 2093         }
 2094 #endif
 2095         return(if_simloop(ifp, m, dst->sa_family, 0));
 2096 }
 2097 
 2098 /*
 2099  * Start output on carp interface. This function should never be called.
 2100  */
 2101 static void
 2102 carp_start(struct ifnet *ifp)
 2103 {
 2104 #ifdef DEBUG
 2105         printf("%s: start called\n", ifp->if_xname);
 2106 #endif
 2107 }
 2108 
 2109 int
 2110 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
 2111     struct rtentry *rt)
 2112 {
 2113         struct m_tag *mtag;
 2114         struct carp_softc *sc;
 2115         struct ifnet *carp_ifp;
 2116 
 2117         if (!sa)
 2118                 return (0);
 2119 
 2120         switch (sa->sa_family) {
 2121 #ifdef INET
 2122         case AF_INET:
 2123                 break;
 2124 #endif /* INET */
 2125 #ifdef INET6
 2126         case AF_INET6:
 2127                 break;
 2128 #endif /* INET6 */
 2129         default:
 2130                 return (0);
 2131         }
 2132 
 2133         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
 2134         if (mtag == NULL)
 2135                 return (0);
 2136 
 2137         bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
 2138         sc = carp_ifp->if_softc;
 2139 
 2140         /* Set the source MAC address to Virtual Router MAC Address */
 2141         switch (ifp->if_type) {
 2142         case IFT_ETHER:
 2143         case IFT_L2VLAN: {
 2144                         struct ether_header *eh;
 2145 
 2146                         eh = mtod(m, struct ether_header *);
 2147                         eh->ether_shost[0] = 0;
 2148                         eh->ether_shost[1] = 0;
 2149                         eh->ether_shost[2] = 0x5e;
 2150                         eh->ether_shost[3] = 0;
 2151                         eh->ether_shost[4] = 1;
 2152                         eh->ether_shost[5] = sc->sc_vhid;
 2153                 }
 2154                 break;
 2155         case IFT_FDDI: {
 2156                         struct fddi_header *fh;
 2157 
 2158                         fh = mtod(m, struct fddi_header *);
 2159                         fh->fddi_shost[0] = 0;
 2160                         fh->fddi_shost[1] = 0;
 2161                         fh->fddi_shost[2] = 0x5e;
 2162                         fh->fddi_shost[3] = 0;
 2163                         fh->fddi_shost[4] = 1;
 2164                         fh->fddi_shost[5] = sc->sc_vhid;
 2165                 }
 2166                 break;
 2167         case IFT_ISO88025: {
 2168                         struct iso88025_header *th;
 2169                         th = mtod(m, struct iso88025_header *);
 2170                         th->iso88025_shost[0] = 3;
 2171                         th->iso88025_shost[1] = 0;
 2172                         th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
 2173                         th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
 2174                         th->iso88025_shost[4] = 0;
 2175                         th->iso88025_shost[5] = 0;
 2176                 }
 2177                 break;
 2178         default:
 2179                 printf("%s: carp is not supported for this interface type\n",
 2180                     ifp->if_xname);
 2181                 return (EOPNOTSUPP);
 2182         }
 2183 
 2184         return (0);
 2185 }
 2186 
 2187 static void
 2188 carp_set_state(struct carp_softc *sc, int state)
 2189 {
 2190         int link_state;
 2191 
 2192         if (sc->sc_carpdev)
 2193                 CARP_SCLOCK_ASSERT(sc);
 2194 
 2195         if (sc->sc_state == state)
 2196                 return;
 2197 
 2198         sc->sc_state = state;
 2199         switch (state) {
 2200         case BACKUP:
 2201                 link_state = LINK_STATE_DOWN;
 2202                 break;
 2203         case MASTER:
 2204                 link_state = LINK_STATE_UP;
 2205                 break;
 2206         default:
 2207                 link_state = LINK_STATE_UNKNOWN;
 2208                 break;
 2209         }
 2210         if_link_state_change(SC2IFP(sc), link_state);
 2211 }
 2212 
 2213 void
 2214 carp_carpdev_state(struct ifnet *ifp)
 2215 {
 2216         struct carp_if *cif;
 2217 
 2218         cif = ifp->if_carp;
 2219         CARP_LOCK(cif);
 2220         carp_carpdev_state_locked(cif);
 2221         CARP_UNLOCK(cif);
 2222 }
 2223 
 2224 static void
 2225 carp_carpdev_state_locked(struct carp_if *cif)
 2226 {
 2227         struct carp_softc *sc;
 2228 
 2229         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
 2230                 carp_sc_state_locked(sc);
 2231 }
 2232 
 2233 static void
 2234 carp_sc_state_locked(struct carp_softc *sc)
 2235 {
 2236         CARP_SCLOCK_ASSERT(sc);
 2237 
 2238         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 2239             !(sc->sc_carpdev->if_flags & IFF_UP)) {
 2240                 sc->sc_flags_backup = SC2IFP(sc)->if_flags;
 2241                 SC2IFP(sc)->if_flags &= ~IFF_UP;
 2242                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 2243                 callout_stop(&sc->sc_ad_tmo);
 2244                 callout_stop(&sc->sc_md_tmo);
 2245                 callout_stop(&sc->sc_md6_tmo);
 2246                 carp_set_state(sc, INIT);
 2247                 carp_setrun(sc, 0);
 2248                 if (!sc->sc_suppress) {
 2249                         carp_suppress_preempt++;
 2250                         if (carp_suppress_preempt == 1) {
 2251                                 CARP_SCUNLOCK(sc);
 2252                                 carp_send_ad_all();
 2253                                 CARP_SCLOCK(sc);
 2254                         }
 2255                 }
 2256                 sc->sc_suppress = 1;
 2257         } else {
 2258                 SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
 2259                 carp_set_state(sc, INIT);
 2260                 carp_setrun(sc, 0);
 2261                 if (sc->sc_suppress)
 2262                         carp_suppress_preempt--;
 2263                 sc->sc_suppress = 0;
 2264         }
 2265 
 2266         return;
 2267 }
 2268 
 2269 #ifdef INET
 2270 extern  struct domain inetdomain;
 2271 static struct protosw in_carp_protosw = {
 2272         .pr_type =              SOCK_RAW,
 2273         .pr_domain =            &inetdomain,
 2274         .pr_protocol =          IPPROTO_CARP,
 2275         .pr_flags =             PR_ATOMIC|PR_ADDR,
 2276         .pr_input =             carp_input,
 2277         .pr_output =            (pr_output_t *)rip_output,
 2278         .pr_ctloutput =         rip_ctloutput,
 2279         .pr_usrreqs =           &rip_usrreqs
 2280 };
 2281 #endif
 2282 
 2283 #ifdef INET6
 2284 extern  struct domain inet6domain;
 2285 static struct ip6protosw in6_carp_protosw = {
 2286         .pr_type =              SOCK_RAW,
 2287         .pr_domain =            &inet6domain,
 2288         .pr_protocol =          IPPROTO_CARP,
 2289         .pr_flags =             PR_ATOMIC|PR_ADDR,
 2290         .pr_input =             carp6_input,
 2291         .pr_output =            rip6_output,
 2292         .pr_ctloutput =         rip6_ctloutput,
 2293         .pr_usrreqs =           &rip6_usrreqs
 2294 };
 2295 #endif
 2296 
 2297 static void
 2298 carp_mod_cleanup(void)
 2299 {
 2300 
 2301         if (if_detach_event_tag == NULL)
 2302                 return;
 2303         EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 2304         if_clone_detach(&carp_cloner);
 2305 #ifdef INET
 2306         if (proto_reg[CARP_INET] == 0) {
 2307                 (void)ipproto_unregister(IPPROTO_CARP);
 2308                 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
 2309                 proto_reg[CARP_INET] = -1;
 2310         }
 2311         carp_iamatch_p = NULL;
 2312 #endif
 2313 #ifdef INET6
 2314         if (proto_reg[CARP_INET6] == 0) {
 2315                 (void)ip6proto_unregister(IPPROTO_CARP);
 2316                 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
 2317                 proto_reg[CARP_INET6] = -1;
 2318         }
 2319         carp_iamatch6_p = NULL;
 2320         carp_macmatch6_p = NULL;
 2321 #endif
 2322         carp_linkstate_p = NULL;
 2323         carp_forus_p = NULL;
 2324         carp_output_p = NULL;
 2325         mtx_destroy(&carp_mtx);
 2326 }
 2327 
 2328 static int
 2329 carp_mod_load(void)
 2330 {
 2331         int err;
 2332 
 2333         if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
 2334                 carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
 2335         if (if_detach_event_tag == NULL)
 2336                 return (ENOMEM);
 2337         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 2338         LIST_INIT(&carpif_list);
 2339         if_clone_attach(&carp_cloner);
 2340         carp_linkstate_p = carp_carpdev_state;
 2341         carp_forus_p = carp_forus;
 2342         carp_output_p = carp_output;
 2343 #ifdef INET6
 2344         carp_iamatch6_p = carp_iamatch6;
 2345         carp_macmatch6_p = carp_macmatch6;
 2346         proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
 2347             (struct protosw *)&in6_carp_protosw);
 2348         if (proto_reg[CARP_INET6] != 0) {
 2349                 printf("carp: error %d attaching to PF_INET6\n",
 2350                     proto_reg[CARP_INET6]);
 2351                 carp_mod_cleanup();
 2352                 return (EINVAL);
 2353         }
 2354         err = ip6proto_register(IPPROTO_CARP);
 2355         if (err) {
 2356                 printf("carp: error %d registering with INET6\n", err);
 2357                 carp_mod_cleanup();
 2358                 return (EINVAL);
 2359         }
 2360 #endif
 2361 #ifdef INET
 2362         carp_iamatch_p = carp_iamatch;
 2363         proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
 2364         if (proto_reg[CARP_INET] != 0) {
 2365                 printf("carp: error %d attaching to PF_INET\n",
 2366                     proto_reg[CARP_INET]);
 2367                 carp_mod_cleanup();
 2368                 return (EINVAL);
 2369         }
 2370         err = ipproto_register(IPPROTO_CARP);
 2371         if (err) {
 2372                 printf("carp: error %d registering with INET\n", err);
 2373                 carp_mod_cleanup();
 2374                 return (EINVAL);
 2375         }
 2376 #endif
 2377         return 0;
 2378 }
 2379 
 2380 static int
 2381 carp_modevent(module_t mod, int type, void *data)
 2382 {
 2383         switch (type) {
 2384         case MOD_LOAD:
 2385                 return carp_mod_load();
 2386                 /* NOTREACHED */
 2387         case MOD_UNLOAD:
 2388                 /*
 2389                  * XXX: For now, disallow module unloading by default due to
 2390                  * a race condition where a thread may dereference one of the
 2391                  * function pointer hooks after the module has been
 2392                  * unloaded, during processing of a packet, causing a panic.
 2393                  */
 2394 #ifdef CARPMOD_CAN_UNLOAD
 2395                 carp_mod_cleanup();
 2396 #else
 2397                 return (EBUSY);
 2398 #endif
 2399                 break;
 2400 
 2401         default:
 2402                 return (EINVAL);
 2403         }
 2404 
 2405         return (0);
 2406 }
 2407 
 2408 static moduledata_t carp_mod = {
 2409         "carp",
 2410         carp_modevent,
 2411         0
 2412 };
 2413 
 2414 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);

Cache object: 68b7e0c51db8ba2abc10675466b1200a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.