The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_carp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: ip_carp.c,v 1.355 2022/09/08 10:22:06 kn Exp $        */
    2 
    3 /*
    4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
    5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
    6  * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
   21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
   27  * THE POSSIBILITY OF SUCH DAMAGE.
   28  */
   29 
   30 /*
   31  * TODO:
   32  *      - iface reconfigure
   33  *      - support for hardware checksum calculations;
   34  *
   35  */
   36 
   37 #include "ether.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/mbuf.h>
   42 #include <sys/socket.h>
   43 #include <sys/socketvar.h>
   44 #include <sys/timeout.h>
   45 #include <sys/ioctl.h>
   46 #include <sys/errno.h>
   47 #include <sys/device.h>
   48 #include <sys/kernel.h>
   49 #include <sys/sysctl.h>
   50 #include <sys/syslog.h>
   51 #include <sys/refcnt.h>
   52 
   53 #include <net/if.h>
   54 #include <net/if_var.h>
   55 #include <net/if_types.h>
   56 #include <net/netisr.h>
   57 
   58 #include <crypto/sha1.h>
   59 
   60 #include <netinet/in.h>
   61 #include <netinet/in_var.h>
   62 #include <netinet/ip.h>
   63 #include <netinet/ip_var.h>
   64 #include <netinet/if_ether.h>
   65 #include <netinet/ip_ipsp.h>
   66 
   67 #include <net/if_dl.h>
   68 
   69 #ifdef INET6
   70 #include <netinet6/in6_var.h>
   71 #include <netinet/icmp6.h>
   72 #include <netinet/ip6.h>
   73 #include <netinet6/ip6_var.h>
   74 #include <netinet6/nd6.h>
   75 #include <netinet6/in6_ifattach.h>
   76 #endif
   77 
   78 #include "bpfilter.h"
   79 #if NBPFILTER > 0
   80 #include <net/bpf.h>
   81 #endif
   82 
   83 #include "vlan.h"
   84 #if NVLAN > 0
   85 #include <net/if_vlan_var.h>
   86 #endif
   87 
   88 #include <netinet/ip_carp.h>
   89 
   90 struct carp_mc_entry {
   91         LIST_ENTRY(carp_mc_entry)       mc_entries;
   92         union {
   93                 struct ether_multi      *mcu_enm;
   94         } mc_u;
   95         struct sockaddr_storage         mc_addr;
   96 };
   97 #define mc_enm  mc_u.mcu_enm
   98 
   99 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
  100 
  101 struct carp_vhost_entry {
  102         SRPL_ENTRY(carp_vhost_entry) vhost_entries;
  103         struct refcnt vhost_refcnt;
  104 
  105         struct carp_softc *parent_sc;
  106         int vhe_leader;
  107         int vhid;
  108         int advskew;
  109         enum { INIT = 0, BACKUP, MASTER }       state;
  110         struct timeout ad_tmo;  /* advertisement timeout */
  111         struct timeout md_tmo;  /* master down timeout */
  112         struct timeout md6_tmo; /* master down timeout */
  113 
  114         u_int64_t vhe_replay_cookie;
  115 
  116         /* authentication */
  117 #define CARP_HMAC_PAD   64
  118         unsigned char vhe_pad[CARP_HMAC_PAD];
  119         SHA1_CTX vhe_sha1[HMAC_MAX];
  120 
  121         u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
  122 };
  123 
  124 void    carp_vh_ref(void *, void *);
  125 void    carp_vh_unref(void *, void *);
  126 
  127 struct srpl_rc carp_vh_rc =
  128     SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL);
  129 
  130 struct carp_softc {
  131         struct arpcom sc_ac;
  132 #define sc_if           sc_ac.ac_if
  133 #define sc_carpdevidx   sc_ac.ac_if.if_carpdevidx
  134         struct task sc_atask;
  135         struct task sc_ltask;
  136         struct task sc_dtask;
  137         struct ip_moptions sc_imo;
  138 #ifdef INET6
  139         struct ip6_moptions sc_im6o;
  140 #endif /* INET6 */
  141 
  142         SRPL_ENTRY(carp_softc) sc_list;
  143         struct refcnt sc_refcnt;
  144 
  145         int sc_suppress;
  146         int sc_bow_out;
  147         int sc_demote_cnt;
  148 
  149         int sc_sendad_errors;
  150 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
  151         int sc_sendad_success;
  152 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
  153 
  154         char sc_curlladdr[ETHER_ADDR_LEN];
  155 
  156         SRPL_HEAD(, carp_vhost_entry) carp_vhosts;
  157         int sc_vhe_count;
  158         u_int8_t sc_vhids[CARP_MAXNODES];
  159         u_int8_t sc_advskews[CARP_MAXNODES];
  160         u_int8_t sc_balancing;
  161 
  162         int sc_naddrs;
  163         int sc_naddrs6;
  164         int sc_advbase;         /* seconds */
  165 
  166         /* authentication */
  167         unsigned char sc_key[CARP_KEY_LEN];
  168 
  169         u_int32_t sc_hashkey[2];
  170         u_int32_t sc_lsmask;            /* load sharing mask */
  171         int sc_lscount;                 /* # load sharing interfaces (max 32) */
  172         int sc_delayed_arp;             /* delayed ARP request countdown */
  173         int sc_realmac;                 /* using real mac */
  174 
  175         struct in_addr sc_peer;
  176 
  177         LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead;
  178         struct carp_vhost_entry *cur_vhe; /* current active vhe */
  179 };
  180 
  181 void    carp_sc_ref(void *, void *);
  182 void    carp_sc_unref(void *, void *);
  183 
  184 struct srpl_rc carp_sc_rc =
  185     SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL);
  186 
  187 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT };   /* XXX for now */
  188 struct cpumem *carpcounters;
  189 
  190 int     carp_send_all_recur = 0;
  191 
  192 #define CARP_LOG(l, sc, s)                                              \
  193         do {                                                            \
  194                 if (carp_opts[CARPCTL_LOG] >= l) {                      \
  195                         if (sc)                                         \
  196                                 log(l, "%s: ",                          \
  197                                     (sc)->sc_if.if_xname);              \
  198                         else                                            \
  199                                 log(l, "carp: ");                       \
  200                         addlog s;                                       \
  201                         addlog("\n");                                   \
  202                 }                                                       \
  203         } while (0)
  204 
  205 void    carp_hmac_prepare(struct carp_softc *);
  206 void    carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
  207 void    carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
  208             unsigned char *, u_int8_t);
  209 int     carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
  210             unsigned char *);
  211 void    carp_proto_input_c(struct ifnet *, struct mbuf *,
  212             struct carp_header *, int, sa_family_t);
  213 int     carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
  214 #ifdef INET6
  215 int     carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
  216 #endif
  217 void    carpattach(int);
  218 void    carpdetach(void *);
  219 void    carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
  220             struct carp_header *);
  221 void    carp_send_ad_all(void);
  222 void    carp_vhe_send_ad_all(struct carp_softc *);
  223 void    carp_timer_ad(void *);
  224 void    carp_send_ad(struct carp_vhost_entry *);
  225 void    carp_send_arp(struct carp_softc *);
  226 void    carp_timer_down(void *);
  227 void    carp_master_down(struct carp_vhost_entry *);
  228 int     carp_ioctl(struct ifnet *, u_long, caddr_t);
  229 int     carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
  230 int     carp_check_dup_vhids(struct carp_softc *, struct srpl *,
  231             struct carpreq *);
  232 void    carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
  233 void    carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
  234 void    carp_start(struct ifnet *);
  235 int     carp_enqueue(struct ifnet *, struct mbuf *);
  236 void    carp_transmit(struct carp_softc *, struct ifnet *, struct mbuf *);
  237 void    carp_setrun_all(struct carp_softc *, sa_family_t);
  238 void    carp_setrun(struct carp_vhost_entry *, sa_family_t);
  239 void    carp_set_state_all(struct carp_softc *, int);
  240 void    carp_set_state(struct carp_vhost_entry *, int);
  241 void    carp_multicast_cleanup(struct carp_softc *);
  242 int     carp_set_ifp(struct carp_softc *, struct ifnet *);
  243 void    carp_set_enaddr(struct carp_softc *);
  244 void    carp_set_vhe_enaddr(struct carp_vhost_entry *);
  245 void    carp_addr_updated(void *);
  246 int     carp_set_addr(struct carp_softc *, struct sockaddr_in *);
  247 int     carp_join_multicast(struct carp_softc *);
  248 #ifdef INET6
  249 void    carp_send_na(struct carp_softc *);
  250 int     carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
  251 int     carp_join_multicast6(struct carp_softc *);
  252 #endif
  253 int     carp_clone_create(struct if_clone *, int);
  254 int     carp_clone_destroy(struct ifnet *);
  255 int     carp_ether_addmulti(struct carp_softc *, struct ifreq *);
  256 int     carp_ether_delmulti(struct carp_softc *, struct ifreq *);
  257 void    carp_ether_purgemulti(struct carp_softc *);
  258 int     carp_group_demote_count(struct carp_softc *);
  259 void    carp_update_lsmask(struct carp_softc *);
  260 int     carp_new_vhost(struct carp_softc *, int, int);
  261 void    carp_destroy_vhosts(struct carp_softc *);
  262 void    carp_del_all_timeouts(struct carp_softc *);
  263 int     carp_vhe_match(struct carp_softc *, uint64_t);
  264 
  265 struct if_clone carp_cloner =
  266     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
  267 
  268 #define carp_cksum(_m, _l)      ((u_int16_t)in_cksum((_m), (_l)))
  269 #define CARP_IFQ_PRIO   6
  270 
  271 void
  272 carp_hmac_prepare(struct carp_softc *sc)
  273 {
  274         struct carp_vhost_entry *vhe;
  275         u_int8_t i;
  276 
  277         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
  278 
  279         SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
  280                 for (i = 0; i < HMAC_MAX; i++) {
  281                         carp_hmac_prepare_ctx(vhe, i);
  282                 }
  283         }
  284 }
  285 
  286 void
  287 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
  288 {
  289         struct carp_softc *sc = vhe->parent_sc;
  290 
  291         u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
  292         u_int8_t vhid = vhe->vhid & 0xff;
  293         SHA1_CTX sha1ctx;
  294         u_int32_t kmd[5];
  295         struct ifaddr *ifa;
  296         int i, found;
  297         struct in_addr last, cur, in;
  298 #ifdef INET6
  299         struct in6_addr last6, cur6, in6;
  300 #endif /* INET6 */
  301 
  302         /* compute ipad from key */
  303         memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad));
  304         bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
  305         for (i = 0; i < sizeof(vhe->vhe_pad); i++)
  306                 vhe->vhe_pad[i] ^= 0x36;
  307 
  308         /* precompute first part of inner hash */
  309         SHA1Init(&vhe->vhe_sha1[ctx]);
  310         SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
  311         SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
  312         SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
  313 
  314         /* generate a key for the arpbalance hash, before the vhid is hashed */
  315         if (vhe->vhe_leader) {
  316                 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
  317                 SHA1Final((unsigned char *)kmd, &sha1ctx);
  318                 sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
  319                 sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
  320         }
  321 
  322         /* the rest of the precomputation */
  323         if (!sc->sc_realmac && vhe->vhe_leader &&
  324             memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0)
  325                 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
  326                     ETHER_ADDR_LEN);
  327 
  328         SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
  329 
  330         /* Hash the addresses from smallest to largest, not interface order */
  331         cur.s_addr = 0;
  332         do {
  333                 found = 0;
  334                 last = cur;
  335                 cur.s_addr = 0xffffffff;
  336                 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
  337                         if (ifa->ifa_addr->sa_family != AF_INET)
  338                                 continue;
  339                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
  340                         if (ntohl(in.s_addr) > ntohl(last.s_addr) &&
  341                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
  342                                 cur.s_addr = in.s_addr;
  343                                 found++;
  344                         }
  345                 }
  346                 if (found)
  347                         SHA1Update(&vhe->vhe_sha1[ctx],
  348                             (void *)&cur, sizeof(cur));
  349         } while (found);
  350 #ifdef INET6
  351         memset(&cur6, 0x00, sizeof(cur6));
  352         do {
  353                 found = 0;
  354                 last6 = cur6;
  355                 memset(&cur6, 0xff, sizeof(cur6));
  356                 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
  357                         if (ifa->ifa_addr->sa_family != AF_INET6)
  358                                 continue;
  359                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
  360                         if (IN6_IS_SCOPE_EMBED(&in6)) {
  361                                 if (ctx == HMAC_NOV6LL)
  362                                         continue;
  363                                 in6.s6_addr16[1] = 0;
  364                         }
  365                         if (memcmp(&in6, &last6, sizeof(in6)) > 0 &&
  366                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
  367                                 cur6 = in6;
  368                                 found++;
  369                         }
  370                 }
  371                 if (found)
  372                         SHA1Update(&vhe->vhe_sha1[ctx],
  373                             (void *)&cur6, sizeof(cur6));
  374         } while (found);
  375 #endif /* INET6 */
  376 
  377         /* convert ipad to opad */
  378         for (i = 0; i < sizeof(vhe->vhe_pad); i++)
  379                 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
  380 }
  381 
  382 void
  383 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
  384     unsigned char md[20], u_int8_t ctx)
  385 {
  386         SHA1_CTX sha1ctx;
  387 
  388         /* fetch first half of inner hash */
  389         bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
  390 
  391         SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
  392         SHA1Final(md, &sha1ctx);
  393 
  394         /* outer hash */
  395         SHA1Init(&sha1ctx);
  396         SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
  397         SHA1Update(&sha1ctx, md, 20);
  398         SHA1Final(md, &sha1ctx);
  399 }
  400 
  401 int
  402 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
  403     unsigned char md[20])
  404 {
  405         unsigned char md2[20];
  406         u_int8_t i;
  407 
  408         for (i = 0; i < HMAC_MAX; i++) {
  409                 carp_hmac_generate(vhe, counter, md2, i);
  410                 if (!timingsafe_bcmp(md, md2, sizeof(md2)))
  411                         return (0);
  412         }
  413         return (1);
  414 }
  415 
  416 int
  417 carp_proto_input(struct mbuf **mp, int *offp, int proto, int af)
  418 {
  419         struct ifnet *ifp;
  420 
  421         ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
  422         if (ifp == NULL) {
  423                 m_freemp(mp);
  424                 return IPPROTO_DONE;
  425         }
  426 
  427         proto = carp_proto_input_if(ifp, mp, offp, proto);
  428         if_put(ifp);
  429         return proto;
  430 }
  431 
  432 /*
  433  * process input packet.
  434  * we have rearranged checks order compared to the rfc,
  435  * but it seems more efficient this way or not possible otherwise.
  436  */
  437 int
  438 carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
  439 {
  440         struct mbuf *m = *mp;
  441         struct ip *ip = mtod(m, struct ip *);
  442         struct carp_softc *sc = NULL;
  443         struct carp_header *ch;
  444         int iplen, len, ismulti;
  445 
  446         carpstat_inc(carps_ipackets);
  447 
  448         if (!carp_opts[CARPCTL_ALLOW]) {
  449                 m_freem(m);
  450                 return IPPROTO_DONE;
  451         }
  452 
  453         ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
  454 
  455         /* check if received on a valid carp interface */
  456         switch (ifp->if_type) {
  457         case IFT_CARP:
  458                 break;
  459         case IFT_ETHER:
  460                 if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp))
  461                         break;
  462                 /* FALLTHROUGH */
  463         default:
  464                 carpstat_inc(carps_badif);
  465                 CARP_LOG(LOG_INFO, sc,
  466                     ("packet received on non-carp interface: %s",
  467                      ifp->if_xname));
  468                 m_freem(m);
  469                 return IPPROTO_DONE;
  470         }
  471 
  472         /* verify that the IP TTL is 255.  */
  473         if (ip->ip_ttl != CARP_DFLTTL) {
  474                 carpstat_inc(carps_badttl);
  475                 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
  476                     ip->ip_ttl, CARP_DFLTTL, ifp->if_xname));
  477                 m_freem(m);
  478                 return IPPROTO_DONE;
  479         }
  480 
  481         /*
  482          * verify that the received packet length is
  483          * equal to the CARP header
  484          */
  485         iplen = ip->ip_hl << 2;
  486         len = iplen + sizeof(*ch);
  487         if (len > m->m_pkthdr.len) {
  488                 carpstat_inc(carps_badlen);
  489                 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s",
  490                     m->m_pkthdr.len, ifp->if_xname));
  491                 m_freem(m);
  492                 return IPPROTO_DONE;
  493         }
  494 
  495         if ((m = *mp = m_pullup(m, len)) == NULL) {
  496                 carpstat_inc(carps_hdrops);
  497                 return IPPROTO_DONE;
  498         }
  499         ip = mtod(m, struct ip *);
  500         ch = (struct carp_header *)(mtod(m, caddr_t) + iplen);
  501 
  502         /* verify the CARP checksum */
  503         m->m_data += iplen;
  504         if (carp_cksum(m, len - iplen)) {
  505                 carpstat_inc(carps_badsum);
  506                 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
  507                     ifp->if_xname));
  508                 m_freem(m);
  509                 return IPPROTO_DONE;
  510         }
  511         m->m_data -= iplen;
  512 
  513         KERNEL_LOCK();
  514         carp_proto_input_c(ifp, m, ch, ismulti, AF_INET);
  515         KERNEL_UNLOCK();
  516         return IPPROTO_DONE;
  517 }
  518 
  519 #ifdef INET6
  520 int
  521 carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af)
  522 {
  523         struct ifnet *ifp;
  524 
  525         ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
  526         if (ifp == NULL) {
  527                 m_freemp(mp);
  528                 return IPPROTO_DONE;
  529         }
  530 
  531         proto = carp6_proto_input_if(ifp, mp, offp, proto);
  532         if_put(ifp);
  533         return proto;
  534 }
  535 
  536 int
  537 carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
  538 {
  539         struct mbuf *m = *mp;
  540         struct carp_softc *sc = NULL;
  541         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
  542         struct carp_header *ch;
  543         u_int len;
  544 
  545         carpstat_inc(carps_ipackets6);
  546 
  547         if (!carp_opts[CARPCTL_ALLOW]) {
  548                 m_freem(m);
  549                 return IPPROTO_DONE;
  550         }
  551 
  552         /* check if received on a valid carp interface */
  553         if (ifp->if_type != IFT_CARP) {
  554                 carpstat_inc(carps_badif);
  555                 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
  556                     ifp->if_xname));
  557                 m_freem(m);
  558                 return IPPROTO_DONE;
  559         }
  560 
  561         /* verify that the IP TTL is 255 */
  562         if (ip6->ip6_hlim != CARP_DFLTTL) {
  563                 carpstat_inc(carps_badttl);
  564                 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
  565                     ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname));
  566                 m_freem(m);
  567                 return IPPROTO_DONE;
  568         }
  569 
  570         /* verify that we have a complete carp packet */
  571         len = m->m_len;
  572         if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) {
  573                 carpstat_inc(carps_badlen);
  574                 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
  575                 return IPPROTO_DONE;
  576         }
  577         ch = (struct carp_header *)(mtod(m, caddr_t) + *offp);
  578 
  579         /* verify the CARP checksum */
  580         m->m_data += *offp;
  581         if (carp_cksum(m, sizeof(*ch))) {
  582                 carpstat_inc(carps_badsum);
  583                 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
  584                     ifp->if_xname));
  585                 m_freem(m);
  586                 return IPPROTO_DONE;
  587         }
  588         m->m_data -= *offp;
  589 
  590         KERNEL_LOCK();
  591         carp_proto_input_c(ifp, m, ch, 1, AF_INET6);
  592         KERNEL_UNLOCK();
  593         return IPPROTO_DONE;
  594 }
  595 #endif /* INET6 */
  596 
  597 void
  598 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch,
  599     int ismulti, sa_family_t af)
  600 {
  601         struct carp_softc *sc;
  602         struct ifnet *ifp0;
  603         struct carp_vhost_entry *vhe;
  604         struct timeval sc_tv, ch_tv;
  605         struct srpl *cif;
  606 
  607         KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
  608 
  609         ifp0 = if_get(ifp->if_carpdevidx);
  610 
  611         if (ifp->if_type == IFT_CARP) {
  612                 /*
  613                  * If the parent of this carp(4) got destroyed while
  614                  * `m' was being processed, silently drop it.
  615                  */
  616                 if (ifp0 == NULL)
  617                         goto rele;
  618                 cif = &ifp0->if_carp;
  619         } else
  620                 cif = &ifp->if_carp;
  621 
  622         SRPL_FOREACH_LOCKED(sc, cif, sc_list) {
  623                 if (af == AF_INET &&
  624                     ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
  625                         continue;
  626                 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
  627                         if (vhe->vhid == ch->carp_vhid)
  628                                 goto found;
  629                 }
  630         }
  631  found:
  632 
  633         if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
  634             (IFF_UP|IFF_RUNNING)) {
  635                 carpstat_inc(carps_badvhid);
  636                 goto rele;
  637         }
  638 
  639         getmicrotime(&sc->sc_if.if_lastchange);
  640 
  641         /* verify the CARP version. */
  642         if (ch->carp_version != CARP_VERSION) {
  643                 carpstat_inc(carps_badver);
  644                 sc->sc_if.if_ierrors++;
  645                 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
  646                     ch->carp_version, CARP_VERSION));
  647                 goto rele;
  648         }
  649 
  650         /* verify the hash */
  651         if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
  652                 carpstat_inc(carps_badauth);
  653                 sc->sc_if.if_ierrors++;
  654                 CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
  655                 goto rele;
  656         }
  657 
  658         if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
  659             sizeof(ch->carp_counter))) {
  660                 struct ifnet *ifp2;
  661 
  662                 ifp2 = if_get(sc->sc_carpdevidx);
  663                 /* Do not log duplicates from non simplex interfaces */
  664                 if (ifp2 && ifp2->if_flags & IFF_SIMPLEX) {
  665                         carpstat_inc(carps_badauth);
  666                         sc->sc_if.if_ierrors++;
  667                         CARP_LOG(LOG_WARNING, sc,
  668                             ("replay or network loop detected"));
  669                 }
  670                 if_put(ifp2);
  671                 goto rele;
  672         }
  673 
  674         sc_tv.tv_sec = sc->sc_advbase;
  675         sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
  676         ch_tv.tv_sec = ch->carp_advbase;
  677         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
  678 
  679         switch (vhe->state) {
  680         case INIT:
  681                 break;
  682         case MASTER:
  683                 /*
  684                  * If we receive an advertisement from a master who's going to
  685                  * be more frequent than us, and whose demote count is not higher
  686                  * than ours, go into BACKUP state. If his demote count is lower,
  687                  * also go into BACKUP.
  688                  */
  689                 if (((timercmp(&sc_tv, &ch_tv, >) ||
  690                     timercmp(&sc_tv, &ch_tv, ==)) &&
  691                     (ch->carp_demote <= carp_group_demote_count(sc))) ||
  692                     ch->carp_demote < carp_group_demote_count(sc)) {
  693                         timeout_del(&vhe->ad_tmo);
  694                         carp_set_state(vhe, BACKUP);
  695                         carp_setrun(vhe, 0);
  696                 }
  697                 break;
  698         case BACKUP:
  699                 /*
  700                  * If we're pre-empting masters who advertise slower than us,
  701                  * and do not have a better demote count, treat them as down.
  702                  *
  703                  */
  704                 if (carp_opts[CARPCTL_PREEMPT] &&
  705                     timercmp(&sc_tv, &ch_tv, <) &&
  706                     ch->carp_demote >= carp_group_demote_count(sc)) {
  707                         carp_master_down(vhe);
  708                         break;
  709                 }
  710 
  711                 /*
  712                  * Take over masters advertising with a higher demote count,
  713                  * regardless of CARPCTL_PREEMPT.
  714                  */
  715                 if (ch->carp_demote > carp_group_demote_count(sc)) {
  716                         carp_master_down(vhe);
  717                         break;
  718                 }
  719 
  720                 /*
  721                  *  If the master is going to advertise at such a low frequency
  722                  *  that he's guaranteed to time out, we'd might as well just
  723                  *  treat him as timed out now.
  724                  */
  725                 sc_tv.tv_sec = sc->sc_advbase * 3;
  726                 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) {
  727                         carp_master_down(vhe);
  728                         break;
  729                 }
  730 
  731                 /*
  732                  * Otherwise, we reset the counter and wait for the next
  733                  * advertisement.
  734                  */
  735                 carp_setrun(vhe, af);
  736                 break;
  737         }
  738 
  739 rele:
  740         if_put(ifp0);
  741         m_freem(m);
  742         return;
  743 }
  744 
  745 int
  746 carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp)
  747 {
  748         struct carpstats carpstat;
  749 
  750         CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t)));
  751         memset(&carpstat, 0, sizeof carpstat);
  752         counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters);
  753         return (sysctl_rdstruct(oldp, oldlenp, newp,
  754             &carpstat, sizeof(carpstat)));
  755 }
  756 
  757 int
  758 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
  759     size_t newlen)
  760 {
  761         int error;
  762 
  763         /* All sysctl names at this level are terminal. */
  764         if (namelen != 1)
  765                 return (ENOTDIR);
  766 
  767         switch (name[0]) {
  768         case CARPCTL_STATS:
  769                 return (carp_sysctl_carpstat(oldp, oldlenp, newp));
  770         default:
  771                 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID)
  772                         return (ENOPROTOOPT);
  773                 NET_LOCK();
  774                 error = sysctl_int(oldp, oldlenp, newp, newlen,
  775                     &carp_opts[name[0]]);
  776                 NET_UNLOCK();
  777                 return (error);
  778         }
  779 }
  780 
  781 /*
  782  * Interface side of the CARP implementation.
  783  */
  784 
  785 /* ARGSUSED */
  786 void
  787 carpattach(int n)
  788 {
  789         if_creategroup("carp");  /* keep around even if empty */
  790         if_clone_attach(&carp_cloner);
  791         carpcounters = counters_alloc(carps_ncounters);
  792 }
  793 
  794 int
  795 carp_clone_create(struct if_clone *ifc, int unit)
  796 {
  797         struct carp_softc *sc;
  798         struct ifnet *ifp;
  799 
  800         sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
  801         refcnt_init(&sc->sc_refcnt);
  802 
  803         SRPL_INIT(&sc->carp_vhosts);
  804         sc->sc_vhe_count = 0;
  805         if (carp_new_vhost(sc, 0, 0)) {
  806                 free(sc, M_DEVBUF, sizeof(*sc));
  807                 return (ENOMEM);
  808         }
  809 
  810         task_set(&sc->sc_atask, carp_addr_updated, sc);
  811         task_set(&sc->sc_ltask, carp_carpdev_state, sc);
  812         task_set(&sc->sc_dtask, carpdetach, sc);
  813 
  814         sc->sc_suppress = 0;
  815         sc->sc_advbase = CARP_DFLTINTV;
  816         sc->sc_naddrs = sc->sc_naddrs6 = 0;
  817 #ifdef INET6
  818         sc->sc_im6o.im6o_hlim = CARP_DFLTTL;
  819 #endif /* INET6 */
  820         sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS,
  821             sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO);
  822         sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
  823 
  824         LIST_INIT(&sc->carp_mc_listhead);
  825         ifp = &sc->sc_if;
  826         ifp->if_softc = sc;
  827         snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
  828             unit);
  829         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
  830         ifp->if_ioctl = carp_ioctl;
  831         ifp->if_start = carp_start;
  832         ifp->if_enqueue = carp_enqueue;
  833         ifp->if_xflags = IFXF_CLONED;
  834         if_counters_alloc(ifp);
  835         if_attach(ifp);
  836         ether_ifattach(ifp);
  837         ifp->if_type = IFT_CARP;
  838         ifp->if_sadl->sdl_type = IFT_CARP;
  839         ifp->if_output = carp_output;
  840         ifp->if_priority = IF_CARP_DEFAULT_PRIORITY;
  841         ifp->if_link_state = LINK_STATE_INVALID;
  842 
  843         /* Hook carp_addr_updated to cope with address and route changes. */
  844         if_addrhook_add(&sc->sc_if, &sc->sc_atask);
  845 
  846         return (0);
  847 }
  848 
  849 int
  850 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
  851 {
  852         struct carp_vhost_entry *vhe, *vhe0;
  853 
  854         vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
  855         if (vhe == NULL)
  856                 return (ENOMEM);
  857 
  858         refcnt_init(&vhe->vhost_refcnt);
  859         carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */
  860         vhe->parent_sc = sc;
  861         vhe->vhid = vhid;
  862         vhe->advskew = advskew;
  863         vhe->state = INIT;
  864         timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe);
  865         timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe);
  866         timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe);
  867 
  868         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
  869 
  870         /* mark the first vhe as leader */
  871         if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) {
  872                 vhe->vhe_leader = 1;
  873                 SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts,
  874                     vhe, vhost_entries);
  875                 sc->sc_vhe_count = 1;
  876                 return (0);
  877         }
  878 
  879         SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
  880                 if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL)
  881                         break;
  882         }
  883 
  884         SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries);
  885         sc->sc_vhe_count++;
  886 
  887         return (0);
  888 }
  889 
  890 int
  891 carp_clone_destroy(struct ifnet *ifp)
  892 {
  893         struct carp_softc *sc = ifp->if_softc;
  894 
  895         if_addrhook_del(&sc->sc_if, &sc->sc_atask);
  896 
  897         NET_LOCK();
  898         carpdetach(sc);
  899         NET_UNLOCK();
  900 
  901         ether_ifdetach(ifp);
  902         if_detach(ifp);
  903         carp_destroy_vhosts(ifp->if_softc);
  904         refcnt_finalize(&sc->sc_refcnt, "carpdtor");
  905         free(sc->sc_imo.imo_membership, M_IPMOPTS,
  906             sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *));
  907         free(sc, M_DEVBUF, sizeof(*sc));
  908         return (0);
  909 }
  910 
  911 void
  912 carp_del_all_timeouts(struct carp_softc *sc)
  913 {
  914         struct carp_vhost_entry *vhe;
  915 
  916         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
  917         SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
  918                 timeout_del(&vhe->ad_tmo);
  919                 timeout_del(&vhe->md_tmo);
  920                 timeout_del(&vhe->md6_tmo);
  921         }
  922 }
  923 
  924 void
  925 carpdetach(void *arg)
  926 {
  927         struct carp_softc *sc = arg;
  928         struct ifnet *ifp0;
  929         struct srpl *cif;
  930 
  931         carp_del_all_timeouts(sc);
  932 
  933         if (sc->sc_demote_cnt)
  934                 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach");
  935         sc->sc_suppress = 0;
  936         sc->sc_sendad_errors = 0;
  937 
  938         carp_set_state_all(sc, INIT);
  939         sc->sc_if.if_flags &= ~IFF_UP;
  940         carp_setrun_all(sc, 0);
  941         carp_multicast_cleanup(sc);
  942 
  943         ifp0 = if_get(sc->sc_carpdevidx);
  944         if (ifp0 == NULL)
  945                 return;
  946 
  947         KERNEL_ASSERT_LOCKED(); /* touching if_carp */
  948 
  949         cif = &ifp0->if_carp;
  950 
  951         SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list);
  952         sc->sc_carpdevidx = 0;
  953 
  954         if_linkstatehook_del(ifp0, &sc->sc_ltask);
  955         if_detachhook_del(ifp0, &sc->sc_dtask);
  956         ifpromisc(ifp0, 0);
  957         if_put(ifp0);
  958 }
  959 
  960 void
  961 carp_destroy_vhosts(struct carp_softc *sc)
  962 {
  963         /* XXX bow out? */
  964         struct carp_vhost_entry *vhe;
  965 
  966         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
  967 
  968         while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) {
  969                 SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe,
  970                     carp_vhost_entry, vhost_entries);
  971                 carp_vh_unref(NULL, vhe); /* drop last ref */
  972         }
  973         sc->sc_vhe_count = 0;
  974 }
  975 
  976 void
  977 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
  978     struct carp_header *ch)
  979 {
  980         if (!vhe->vhe_replay_cookie) {
  981                 arc4random_buf(&vhe->vhe_replay_cookie,
  982                     sizeof(vhe->vhe_replay_cookie));
  983         }
  984 
  985         bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
  986             sizeof(ch->carp_counter));
  987 
  988         /*
  989          * For the time being, do not include the IPv6 linklayer addresses
  990          * in the HMAC.
  991          */
  992         carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
  993 }
  994 
  995 void
  996 carp_send_ad_all(void)
  997 {
  998         struct ifnet *ifp0;
  999         struct srpl *cif;
 1000         struct carp_softc *vh;
 1001 
 1002         KERNEL_ASSERT_LOCKED(); /* touching if_carp */
 1003 
 1004         if (carp_send_all_recur > 0)
 1005                 return;
 1006         ++carp_send_all_recur;
 1007         TAILQ_FOREACH(ifp0, &ifnetlist, if_list) {
 1008                 if (ifp0->if_type != IFT_ETHER)
 1009                         continue;
 1010 
 1011                 cif = &ifp0->if_carp;
 1012                 SRPL_FOREACH_LOCKED(vh, cif, sc_list) {
 1013                         if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
 1014                             (IFF_UP|IFF_RUNNING)) {
 1015                                 carp_vhe_send_ad_all(vh);
 1016                         }
 1017                 }
 1018         }
 1019         --carp_send_all_recur;
 1020 }
 1021 
 1022 void
 1023 carp_vhe_send_ad_all(struct carp_softc *sc)
 1024 {
 1025         struct carp_vhost_entry *vhe;
 1026 
 1027         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 1028 
 1029         SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
 1030                 if (vhe->state == MASTER)
 1031                         carp_send_ad(vhe);
 1032         }
 1033 }
 1034 
 1035 void
 1036 carp_timer_ad(void *v)
 1037 {
 1038         NET_LOCK();
 1039         carp_send_ad(v);
 1040         NET_UNLOCK();
 1041 }
 1042 
 1043 void
 1044 carp_send_ad(struct carp_vhost_entry *vhe)
 1045 {
 1046         struct carp_header ch;
 1047         struct timeval tv;
 1048         struct carp_softc *sc = vhe->parent_sc;
 1049         struct carp_header *ch_ptr;
 1050         struct mbuf *m;
 1051         int error, len, advbase, advskew;
 1052         struct ifnet *ifp;
 1053         struct ifaddr *ifa;
 1054         struct sockaddr sa;
 1055 
 1056         NET_ASSERT_LOCKED();
 1057 
 1058         if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) {
 1059                 sc->sc_if.if_oerrors++;
 1060                 return;
 1061         }
 1062 
 1063         /* bow out if we've gone to backup (the carp interface is going down) */
 1064         if (sc->sc_bow_out) {
 1065                 advbase = 255;
 1066                 advskew = 255;
 1067         } else {
 1068                 advbase = sc->sc_advbase;
 1069                 advskew = vhe->advskew;
 1070                 tv.tv_sec = advbase;
 1071                 if (advbase == 0 && advskew == 0)
 1072                         tv.tv_usec = 1 * 1000000 / 256;
 1073                 else
 1074                         tv.tv_usec = advskew * 1000000 / 256;
 1075         }
 1076 
 1077         ch.carp_version = CARP_VERSION;
 1078         ch.carp_type = CARP_ADVERTISEMENT;
 1079         ch.carp_vhid = vhe->vhid;
 1080         ch.carp_demote = carp_group_demote_count(sc) & 0xff;
 1081         ch.carp_advbase = advbase;
 1082         ch.carp_advskew = advskew;
 1083         ch.carp_authlen = 7;    /* XXX DEFINE */
 1084         ch.carp_cksum = 0;
 1085 
 1086         sc->cur_vhe = vhe; /* we need the vhe later on the output path */
 1087 
 1088         if (sc->sc_naddrs) {
 1089                 struct ip *ip;
 1090 
 1091                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
 1092                 if (m == NULL) {
 1093                         sc->sc_if.if_oerrors++;
 1094                         carpstat_inc(carps_onomem);
 1095                         /* XXX maybe less ? */
 1096                         goto retry_later;
 1097                 }
 1098                 len = sizeof(*ip) + sizeof(ch);
 1099                 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
 1100                 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
 1101                 m->m_pkthdr.len = len;
 1102                 m->m_len = len;
 1103                 m_align(m, len);
 1104                 ip = mtod(m, struct ip *);
 1105                 ip->ip_v = IPVERSION;
 1106                 ip->ip_hl = sizeof(*ip) >> 2;
 1107                 ip->ip_tos = IPTOS_LOWDELAY;
 1108                 ip->ip_len = htons(len);
 1109                 ip->ip_id = htons(ip_randomid());
 1110                 ip->ip_off = htons(IP_DF);
 1111                 ip->ip_ttl = CARP_DFLTTL;
 1112                 ip->ip_p = IPPROTO_CARP;
 1113                 ip->ip_sum = 0;
 1114 
 1115                 memset(&sa, 0, sizeof(sa));
 1116                 sa.sa_family = AF_INET;
 1117                 /* Prefer addresses on the parent interface as source for AD. */
 1118                 ifa = ifaof_ifpforaddr(&sa, ifp);
 1119                 if (ifa == NULL)
 1120                         ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
 1121                 KASSERT(ifa != NULL);
 1122                 ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
 1123                 ip->ip_dst.s_addr = sc->sc_peer.s_addr;
 1124                 if (IN_MULTICAST(ip->ip_dst.s_addr))
 1125                         m->m_flags |= M_MCAST;
 1126 
 1127                 ch_ptr = (struct carp_header *)(ip + 1);
 1128                 bcopy(&ch, ch_ptr, sizeof(ch));
 1129                 carp_prepare_ad(m, vhe, ch_ptr);
 1130 
 1131                 m->m_data += sizeof(*ip);
 1132                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
 1133                 m->m_data -= sizeof(*ip);
 1134 
 1135                 getmicrotime(&sc->sc_if.if_lastchange);
 1136                 carpstat_inc(carps_opackets);
 1137 
 1138                 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
 1139                     NULL, 0);
 1140                 if (error &&
 1141                     /* when unicast, the peer's down is not our fault */
 1142                     !(!IN_MULTICAST(sc->sc_peer.s_addr) && error == EHOSTDOWN)){
 1143                         if (error == ENOBUFS)
 1144                                 carpstat_inc(carps_onomem);
 1145                         else
 1146                                 CARP_LOG(LOG_WARNING, sc,
 1147                                     ("ip_output failed: %d", error));
 1148                         sc->sc_if.if_oerrors++;
 1149                         if (sc->sc_sendad_errors < INT_MAX)
 1150                                 sc->sc_sendad_errors++;
 1151                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
 1152                                 carp_group_demote_adj(&sc->sc_if, 1,
 1153                                     "> snderrors");
 1154                         sc->sc_sendad_success = 0;
 1155                 } else {
 1156                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
 1157                                 if (++sc->sc_sendad_success >=
 1158                                     CARP_SENDAD_MIN_SUCCESS(sc)) {
 1159                                         carp_group_demote_adj(&sc->sc_if, -1,
 1160                                             "< snderrors");
 1161                                         sc->sc_sendad_errors = 0;
 1162                                 }
 1163                         } else
 1164                                 sc->sc_sendad_errors = 0;
 1165                 }
 1166                 if (vhe->vhe_leader) {
 1167                         if (sc->sc_delayed_arp > 0)
 1168                                 sc->sc_delayed_arp--;
 1169                         if (sc->sc_delayed_arp == 0) {
 1170                                 carp_send_arp(sc);
 1171                                 sc->sc_delayed_arp = -1;
 1172                         }
 1173                 }
 1174         }
 1175 #ifdef INET6
 1176         if (sc->sc_naddrs6) {
 1177                 struct ip6_hdr *ip6;
 1178 
 1179                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
 1180                 if (m == NULL) {
 1181                         sc->sc_if.if_oerrors++;
 1182                         carpstat_inc(carps_onomem);
 1183                         /* XXX maybe less ? */
 1184                         goto retry_later;
 1185                 }
 1186                 len = sizeof(*ip6) + sizeof(ch);
 1187                 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
 1188                 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
 1189                 m->m_pkthdr.len = len;
 1190                 m->m_len = len;
 1191                 m_align(m, len);
 1192                 m->m_flags |= M_MCAST;
 1193                 ip6 = mtod(m, struct ip6_hdr *);
 1194                 memset(ip6, 0, sizeof(*ip6));
 1195                 ip6->ip6_vfc |= IPV6_VERSION;
 1196                 ip6->ip6_hlim = CARP_DFLTTL;
 1197                 ip6->ip6_nxt = IPPROTO_CARP;
 1198 
 1199                 /* set the source address */
 1200                 memset(&sa, 0, sizeof(sa));
 1201                 sa.sa_family = AF_INET6;
 1202                 /* Prefer addresses on the parent interface as source for AD. */
 1203                 ifa = ifaof_ifpforaddr(&sa, ifp);
 1204                 if (ifa == NULL)
 1205                         ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
 1206                 KASSERT(ifa != NULL);
 1207                 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
 1208                     &ip6->ip6_src, sizeof(struct in6_addr));
 1209                 /* set the multicast destination */
 1210 
 1211                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 1212                 ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index);
 1213                 ip6->ip6_dst.s6_addr8[15] = 0x12;
 1214 
 1215                 ch_ptr = (struct carp_header *)(ip6 + 1);
 1216                 bcopy(&ch, ch_ptr, sizeof(ch));
 1217                 carp_prepare_ad(m, vhe, ch_ptr);
 1218 
 1219                 m->m_data += sizeof(*ip6);
 1220                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
 1221                 m->m_data -= sizeof(*ip6);
 1222 
 1223                 getmicrotime(&sc->sc_if.if_lastchange);
 1224                 carpstat_inc(carps_opackets6);
 1225 
 1226                 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL);
 1227                 if (error) {
 1228                         if (error == ENOBUFS)
 1229                                 carpstat_inc(carps_onomem);
 1230                         else
 1231                                 CARP_LOG(LOG_WARNING, sc,
 1232                                     ("ip6_output failed: %d", error));
 1233                         sc->sc_if.if_oerrors++;
 1234                         if (sc->sc_sendad_errors < INT_MAX)
 1235                                 sc->sc_sendad_errors++;
 1236                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
 1237                                 carp_group_demote_adj(&sc->sc_if, 1,
 1238                                             "> snd6errors");
 1239                         sc->sc_sendad_success = 0;
 1240                 } else {
 1241                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
 1242                                 if (++sc->sc_sendad_success >=
 1243                                     CARP_SENDAD_MIN_SUCCESS(sc)) {
 1244                                         carp_group_demote_adj(&sc->sc_if, -1,
 1245                                             "< snd6errors");
 1246                                         sc->sc_sendad_errors = 0;
 1247                                 }
 1248                         } else
 1249                                 sc->sc_sendad_errors = 0;
 1250                 }
 1251         }
 1252 #endif /* INET6 */
 1253 
 1254 retry_later:
 1255         sc->cur_vhe = NULL;
 1256         if (advbase != 255 || advskew != 255)
 1257                 timeout_add_tv(&vhe->ad_tmo, &tv);
 1258         if_put(ifp);
 1259 }
 1260 
 1261 /*
 1262  * Broadcast a gratuitous ARP request containing
 1263  * the virtual router MAC address for each IP address
 1264  * associated with the virtual router.
 1265  */
 1266 void
 1267 carp_send_arp(struct carp_softc *sc)
 1268 {
 1269         struct ifaddr *ifa;
 1270         in_addr_t in;
 1271 
 1272         TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
 1273 
 1274                 if (ifa->ifa_addr->sa_family != AF_INET)
 1275                         continue;
 1276 
 1277                 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
 1278                 arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr);
 1279         }
 1280 }
 1281 
 1282 #ifdef INET6
 1283 void
 1284 carp_send_na(struct carp_softc *sc)
 1285 {
 1286         struct ifaddr *ifa;
 1287         struct in6_addr *in6;
 1288         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 1289 
 1290         TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
 1291 
 1292                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1293                         continue;
 1294 
 1295                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
 1296                 nd6_na_output(&sc->sc_if, &mcast, in6,
 1297                     ND_NA_FLAG_OVERRIDE |
 1298                     (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL);
 1299         }
 1300 }
 1301 #endif /* INET6 */
 1302 
 1303 void
 1304 carp_update_lsmask(struct carp_softc *sc)
 1305 {
 1306         struct carp_vhost_entry *vhe;
 1307         int count;
 1308 
 1309         if (sc->sc_balancing == CARP_BAL_NONE)
 1310                 return;
 1311 
 1312         sc->sc_lsmask = 0;
 1313         count = 0;
 1314 
 1315         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 1316         SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
 1317                 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
 1318                         sc->sc_lsmask |= 1 << count;
 1319                 count++;
 1320         }
 1321         sc->sc_lscount = count;
 1322         CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
 1323 }
 1324 
 1325 int
 1326 carp_iamatch(struct ifnet *ifp)
 1327 {
 1328         struct carp_softc *sc = ifp->if_softc;
 1329         struct carp_vhost_entry *vhe;
 1330         struct srp_ref sr;
 1331         int match = 0;
 1332 
 1333         vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
 1334         if (vhe->state == MASTER)
 1335                 match = 1;
 1336         SRPL_LEAVE(&sr);
 1337 
 1338         return (match);
 1339 }
 1340 
 1341 int
 1342 carp_ourether(struct ifnet *ifp, uint8_t *ena)
 1343 {
 1344         struct srpl *cif = &ifp->if_carp;
 1345         struct carp_softc *sc;
 1346         struct srp_ref sr;
 1347         int match = 0;
 1348         uint64_t dst = ether_addr_to_e64((struct ether_addr *)ena);
 1349 
 1350         KASSERT(ifp->if_type == IFT_ETHER);
 1351 
 1352         SRPL_FOREACH(sc, &sr, cif, sc_list) {
 1353                 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
 1354                     (IFF_UP|IFF_RUNNING))
 1355                         continue;
 1356                 if (carp_vhe_match(sc, dst)) {
 1357                         match = 1;
 1358                         break;
 1359                 }
 1360         }
 1361         SRPL_LEAVE(&sr);
 1362 
 1363         return (match);
 1364 }
 1365 
 1366 int
 1367 carp_vhe_match(struct carp_softc *sc, uint64_t dst)
 1368 {
 1369         struct carp_vhost_entry *vhe;
 1370         struct srp_ref sr;
 1371         int active = 0;
 1372 
 1373         vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
 1374         active = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP);
 1375         SRPL_LEAVE(&sr);
 1376 
 1377         return (active && (dst ==
 1378             ether_addr_to_e64((struct ether_addr *)sc->sc_ac.ac_enaddr)));
 1379 }
 1380 
 1381 struct mbuf *
 1382 carp_input(struct ifnet *ifp0, struct mbuf *m, uint64_t dst)
 1383 {
 1384         struct srpl *cif;
 1385         struct carp_softc *sc;
 1386         struct srp_ref sr;
 1387 
 1388         cif = &ifp0->if_carp;
 1389 
 1390         SRPL_FOREACH(sc, &sr, cif, sc_list) {
 1391                 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
 1392                     (IFF_UP|IFF_RUNNING))
 1393                         continue;
 1394 
 1395                 if (carp_vhe_match(sc, dst)) {
 1396                         /*
 1397                          * These packets look like layer 2 multicast but they
 1398                          * are unicast at layer 3. With help of the tag the
 1399                          * mbuf's M_MCAST flag can be removed by carp_lsdrop()
 1400                          * after we have passed layer 2.
 1401                          */
 1402                         if (sc->sc_balancing == CARP_BAL_IP) {
 1403                                 struct m_tag *mtag;
 1404                                 mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
 1405                                     M_NOWAIT);
 1406                                 if (mtag == NULL) {
 1407                                         m_freem(m);
 1408                                         goto out;
 1409                                 }
 1410                                 m_tag_prepend(m, mtag);
 1411                         }
 1412                         break;
 1413                 }
 1414         }
 1415 
 1416         if (sc == NULL) {
 1417                 SRPL_LEAVE(&sr);
 1418 
 1419                 if (!ETH64_IS_MULTICAST(dst))
 1420                         return (m);
 1421 
 1422                 /*
 1423                  * XXX Should really check the list of multicast addresses
 1424                  * for each CARP interface _before_ copying.
 1425                  */
 1426                 SRPL_FOREACH(sc, &sr, cif, sc_list) {
 1427                         struct mbuf *m0;
 1428 
 1429                         if (!(sc->sc_if.if_flags & IFF_UP))
 1430                                 continue;
 1431 
 1432                         m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT);
 1433                         if (m0 == NULL)
 1434                                 continue;
 1435 
 1436                         if_vinput(&sc->sc_if, m0);
 1437                 }
 1438                 SRPL_LEAVE(&sr);
 1439 
 1440                 return (m);
 1441         }
 1442 
 1443         if_vinput(&sc->sc_if, m);
 1444 out:
 1445         SRPL_LEAVE(&sr);
 1446 
 1447         return (NULL);
 1448 }
 1449 
 1450 int
 1451 carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src,
 1452     u_int32_t *dst, int drop)
 1453 {
 1454         struct carp_softc *sc;
 1455         u_int32_t fold;
 1456         struct m_tag *mtag;
 1457 
 1458         if (ifp->if_type != IFT_CARP)
 1459                 return 0;
 1460         sc = ifp->if_softc;
 1461         if (sc->sc_balancing == CARP_BAL_NONE)
 1462                 return 0;
 1463 
 1464         /*
 1465          * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact
 1466          * that it is layer 2 multicast does not implicate that it is also layer
 1467          * 3 multicast.
 1468          */
 1469         if (m->m_flags & M_MCAST &&
 1470             (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
 1471                 m_tag_delete(m, mtag);
 1472                 m->m_flags &= ~M_MCAST;
 1473         }
 1474 
 1475         /*
 1476          * Return without making a drop decision. This allows to clear the
 1477          * M_MCAST flag and do nothing else.
 1478          */
 1479         if (!drop)
 1480                 return 0;
 1481 
 1482         /*
 1483          * Never drop carp advertisements.
 1484          * XXX Bad idea to pass all broadcast / multicast traffic?
 1485          */
 1486         if (m->m_flags & (M_BCAST|M_MCAST))
 1487                 return 0;
 1488 
 1489         fold = src[0] ^ dst[0];
 1490 #ifdef INET6
 1491         if (af == AF_INET6) {
 1492                 int i;
 1493                 for (i = 1; i < 4; i++)
 1494                         fold ^= src[i] ^ dst[i];
 1495         }
 1496 #endif
 1497         if (sc->sc_lscount == 0) /* just to be safe */
 1498                 return 1;
 1499 
 1500         return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0;
 1501 }
 1502 
 1503 void
 1504 carp_timer_down(void *v)
 1505 {
 1506         NET_LOCK();
 1507         carp_master_down(v);
 1508         NET_UNLOCK();
 1509 }
 1510 
 1511 void
 1512 carp_master_down(struct carp_vhost_entry *vhe)
 1513 {
 1514         struct carp_softc *sc = vhe->parent_sc;
 1515 
 1516         NET_ASSERT_LOCKED();
 1517 
 1518         switch (vhe->state) {
 1519         case INIT:
 1520                 printf("%s: master_down event in INIT state\n",
 1521                     sc->sc_if.if_xname);
 1522                 break;
 1523         case MASTER:
 1524                 break;
 1525         case BACKUP:
 1526                 carp_set_state(vhe, MASTER);
 1527                 carp_send_ad(vhe);
 1528                 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
 1529                         carp_send_arp(sc);
 1530                         /* Schedule a delayed ARP to deal w/ some L3 switches */
 1531                         sc->sc_delayed_arp = 2;
 1532 #ifdef INET6
 1533                         carp_send_na(sc);
 1534 #endif /* INET6 */
 1535                 }
 1536                 carp_setrun(vhe, 0);
 1537                 carpstat_inc(carps_preempt);
 1538                 break;
 1539         }
 1540 }
 1541 
 1542 void
 1543 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
 1544 {
 1545         struct carp_vhost_entry *vhe;
 1546 
 1547         KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */
 1548         SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
 1549                 carp_setrun(vhe, af);
 1550         }
 1551 }
 1552 
 1553 /*
 1554  * When in backup state, af indicates whether to reset the master down timer
 1555  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
 1556  */
 1557 void
 1558 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
 1559 {
 1560         struct ifnet *ifp;
 1561         struct timeval tv;
 1562         struct carp_softc *sc = vhe->parent_sc;
 1563 
 1564         if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) {
 1565                 sc->sc_if.if_flags &= ~IFF_RUNNING;
 1566                 carp_set_state_all(sc, INIT);
 1567                 return;
 1568         }
 1569 
 1570         if (memcmp(((struct arpcom *)ifp)->ac_enaddr,
 1571             sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0)
 1572                 sc->sc_realmac = 1;
 1573         else
 1574                 sc->sc_realmac = 0;
 1575 
 1576         if_put(ifp);
 1577 
 1578         if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
 1579             (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
 1580                 sc->sc_if.if_flags |= IFF_RUNNING;
 1581         } else {
 1582                 sc->sc_if.if_flags &= ~IFF_RUNNING;
 1583                 return;
 1584         }
 1585 
 1586         switch (vhe->state) {
 1587         case INIT:
 1588                 carp_set_state(vhe, BACKUP);
 1589                 carp_setrun(vhe, 0);
 1590                 break;
 1591         case BACKUP:
 1592                 timeout_del(&vhe->ad_tmo);
 1593                 tv.tv_sec = 3 * sc->sc_advbase;
 1594                 if (sc->sc_advbase == 0 && vhe->advskew == 0)
 1595                         tv.tv_usec = 3 * 1000000 / 256;
 1596                 else if (sc->sc_advbase == 0)
 1597                         tv.tv_usec = 3 * vhe->advskew * 1000000 / 256;
 1598                 else
 1599                         tv.tv_usec = vhe->advskew * 1000000 / 256;
 1600                 if (vhe->vhe_leader)
 1601                         sc->sc_delayed_arp = -1;
 1602                 switch (af) {
 1603                 case AF_INET:
 1604                         timeout_add_tv(&vhe->md_tmo, &tv);
 1605                         break;
 1606 #ifdef INET6
 1607                 case AF_INET6:
 1608                         timeout_add_tv(&vhe->md6_tmo, &tv);
 1609                         break;
 1610 #endif /* INET6 */
 1611                 default:
 1612                         if (sc->sc_naddrs)
 1613                                 timeout_add_tv(&vhe->md_tmo, &tv);
 1614                         if (sc->sc_naddrs6)
 1615                                 timeout_add_tv(&vhe->md6_tmo, &tv);
 1616                         break;
 1617                 }
 1618                 break;
 1619         case MASTER:
 1620                 tv.tv_sec = sc->sc_advbase;
 1621                 if (sc->sc_advbase == 0 && vhe->advskew == 0)
 1622                         tv.tv_usec = 1 * 1000000 / 256;
 1623                 else
 1624                         tv.tv_usec = vhe->advskew * 1000000 / 256;
 1625                 timeout_add_tv(&vhe->ad_tmo, &tv);
 1626                 break;
 1627         }
 1628 }
 1629 
 1630 void
 1631 carp_multicast_cleanup(struct carp_softc *sc)
 1632 {
 1633         struct ip_moptions *imo = &sc->sc_imo;
 1634 #ifdef INET6
 1635         struct ip6_moptions *im6o = &sc->sc_im6o;
 1636 #endif
 1637         u_int16_t n = imo->imo_num_memberships;
 1638 
 1639         /* Clean up our own multicast memberships */
 1640         while (n-- > 0) {
 1641                 if (imo->imo_membership[n] != NULL) {
 1642                         in_delmulti(imo->imo_membership[n]);
 1643                         imo->imo_membership[n] = NULL;
 1644                 }
 1645         }
 1646         imo->imo_num_memberships = 0;
 1647         imo->imo_ifidx = 0;
 1648 
 1649 #ifdef INET6
 1650         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
 1651                 struct in6_multi_mship *imm =
 1652                     LIST_FIRST(&im6o->im6o_memberships);
 1653 
 1654                 LIST_REMOVE(imm, i6mm_chain);
 1655                 in6_leavegroup(imm);
 1656         }
 1657         im6o->im6o_ifidx = 0;
 1658 #endif
 1659 
 1660         /* And any other multicast memberships */
 1661         carp_ether_purgemulti(sc);
 1662 }
 1663 
 1664 int
 1665 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0)
 1666 {
 1667         struct srpl *cif;
 1668         struct carp_softc *vr, *last = NULL, *after = NULL;
 1669         int myself = 0, error = 0;
 1670 
 1671         KASSERT(ifp0->if_index != sc->sc_carpdevidx);
 1672         KERNEL_ASSERT_LOCKED(); /* touching if_carp */
 1673 
 1674         if ((ifp0->if_flags & IFF_MULTICAST) == 0)
 1675                 return (EADDRNOTAVAIL);
 1676 
 1677         if (ifp0->if_type != IFT_ETHER)
 1678                 return (EINVAL);
 1679 
 1680         cif = &ifp0->if_carp;
 1681         if (carp_check_dup_vhids(sc, cif, NULL))
 1682                 return (EINVAL);
 1683 
 1684         if ((error = ifpromisc(ifp0, 1)))
 1685                 return (error);
 1686 
 1687         /* detach from old interface */
 1688         if (sc->sc_carpdevidx != 0)
 1689                 carpdetach(sc);
 1690 
 1691         /* attach carp interface to physical interface */
 1692         if_detachhook_add(ifp0, &sc->sc_dtask);
 1693         if_linkstatehook_add(ifp0, &sc->sc_ltask);
 1694 
 1695         sc->sc_carpdevidx = ifp0->if_index;
 1696         sc->sc_if.if_capabilities = ifp0->if_capabilities &
 1697             IFCAP_CSUM_MASK;
 1698 
 1699         SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
 1700                 struct carp_vhost_entry *vrhead, *schead;
 1701                 last = vr;
 1702 
 1703                 if (vr == sc)
 1704                         myself = 1;
 1705 
 1706                 vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts);
 1707                 schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
 1708                 if (vrhead->vhid < schead->vhid)
 1709                         after = vr;
 1710         }
 1711 
 1712         if (!myself) {
 1713                 /* We're trying to keep things in order */
 1714                 if (last == NULL) {
 1715                         SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif,
 1716                             sc, sc_list);
 1717                 } else if (after == NULL) {
 1718                         SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last,
 1719                             sc, sc_list);
 1720                 } else {
 1721                         SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after,
 1722                             sc, sc_list);
 1723                 }
 1724         }
 1725         if (sc->sc_naddrs || sc->sc_naddrs6)
 1726                 sc->sc_if.if_flags |= IFF_UP;
 1727         carp_set_enaddr(sc);
 1728 
 1729         carp_carpdev_state(sc);
 1730 
 1731         return (0);
 1732 }
 1733 
 1734 void
 1735 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
 1736 {
 1737         struct carp_softc *sc = vhe->parent_sc;
 1738 
 1739         if (vhe->vhid != 0 && sc->sc_carpdevidx != 0) {
 1740                 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
 1741                         vhe->vhe_enaddr[0] = 1;
 1742                 else
 1743                         vhe->vhe_enaddr[0] = 0;
 1744                 vhe->vhe_enaddr[1] = 0;
 1745                 vhe->vhe_enaddr[2] = 0x5e;
 1746                 vhe->vhe_enaddr[3] = 0;
 1747                 vhe->vhe_enaddr[4] = 1;
 1748                 vhe->vhe_enaddr[5] = vhe->vhid;
 1749         } else
 1750                 memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN);
 1751 }
 1752 
 1753 void
 1754 carp_set_enaddr(struct carp_softc *sc)
 1755 {
 1756         struct carp_vhost_entry *vhe;
 1757 
 1758         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 1759         SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries)
 1760                 carp_set_vhe_enaddr(vhe);
 1761 
 1762         vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
 1763 
 1764         /*
 1765          * Use the carp lladdr if the running one isn't manually set.
 1766          * Only compare static parts of the lladdr.
 1767          */
 1768         if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
 1769             ETHER_ADDR_LEN - 2) == 0) ||
 1770             (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
 1771             !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
 1772             !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
 1773                 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
 1774 
 1775         /* Make sure the enaddr has changed before further twiddling. */
 1776         if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
 1777                 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
 1778                     ETHER_ADDR_LEN);
 1779                 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
 1780 #ifdef INET6
 1781                 /*
 1782                  * (re)attach a link-local address which matches
 1783                  * our new MAC address.
 1784                  */
 1785                 if (sc->sc_naddrs6)
 1786                         in6_ifattach_linklocal(&sc->sc_if, NULL);
 1787 #endif
 1788                 carp_set_state_all(sc, INIT);
 1789                 carp_setrun_all(sc, 0);
 1790         }
 1791 }
 1792 
 1793 void
 1794 carp_addr_updated(void *v)
 1795 {
 1796         struct carp_softc *sc = (struct carp_softc *) v;
 1797         struct ifaddr *ifa;
 1798         int new_naddrs = 0, new_naddrs6 = 0;
 1799 
 1800         TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
 1801                 if (ifa->ifa_addr->sa_family == AF_INET)
 1802                         new_naddrs++;
 1803 #ifdef INET6
 1804                 else if (ifa->ifa_addr->sa_family == AF_INET6)
 1805                         new_naddrs6++;
 1806 #endif /* INET6 */
 1807         }
 1808 
 1809         /* We received address changes from if_addrhooks callback */
 1810         if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) {
 1811 
 1812                 sc->sc_naddrs = new_naddrs;
 1813                 sc->sc_naddrs6 = new_naddrs6;
 1814 
 1815                 /* Re-establish multicast membership removed by in_control */
 1816                 if (IN_MULTICAST(sc->sc_peer.s_addr)) {
 1817                         if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) {
 1818                                 struct in_multi **imm =
 1819                                     sc->sc_imo.imo_membership;
 1820                                 u_int16_t maxmem =
 1821                                     sc->sc_imo.imo_max_memberships;
 1822 
 1823                                 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
 1824                                 sc->sc_imo.imo_membership = imm;
 1825                                 sc->sc_imo.imo_max_memberships = maxmem;
 1826 
 1827                                 if (sc->sc_carpdevidx != 0 &&
 1828                                     sc->sc_naddrs > 0)
 1829                                         carp_join_multicast(sc);
 1830                         }
 1831                 }
 1832 
 1833                 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
 1834                         sc->sc_if.if_flags &= ~IFF_UP;
 1835                         carp_set_state_all(sc, INIT);
 1836                 } else
 1837                         carp_hmac_prepare(sc);
 1838         }
 1839 
 1840         carp_setrun_all(sc, 0);
 1841 }
 1842 
 1843 int
 1844 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
 1845 {
 1846         struct in_addr *in = &sin->sin_addr;
 1847         int error;
 1848 
 1849         KASSERT(sc->sc_carpdevidx != 0);
 1850 
 1851         /* XXX is this necessary? */
 1852         if (in->s_addr == INADDR_ANY) {
 1853                 carp_setrun_all(sc, 0);
 1854                 return (0);
 1855         }
 1856 
 1857         if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
 1858                 return (error);
 1859 
 1860         carp_set_state_all(sc, INIT);
 1861 
 1862         return (0);
 1863 }
 1864 
 1865 int
 1866 carp_join_multicast(struct carp_softc *sc)
 1867 {
 1868         struct ip_moptions *imo = &sc->sc_imo;
 1869         struct in_multi *imm;
 1870         struct in_addr addr;
 1871 
 1872         if (!IN_MULTICAST(sc->sc_peer.s_addr))
 1873                 return (0);
 1874 
 1875         addr.s_addr = sc->sc_peer.s_addr;
 1876         if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
 1877                 return (ENOBUFS);
 1878 
 1879         imo->imo_membership[0] = imm;
 1880         imo->imo_num_memberships = 1;
 1881         imo->imo_ifidx = sc->sc_if.if_index;
 1882         imo->imo_ttl = CARP_DFLTTL;
 1883         imo->imo_loop = 0;
 1884         return (0);
 1885 }
 1886 
 1887 
 1888 #ifdef INET6
 1889 int
 1890 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
 1891 {
 1892         int error;
 1893 
 1894         KASSERT(sc->sc_carpdevidx != 0);
 1895 
 1896         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 1897                 carp_setrun_all(sc, 0);
 1898                 return (0);
 1899         }
 1900 
 1901         if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
 1902                 return (error);
 1903 
 1904         carp_set_state_all(sc, INIT);
 1905 
 1906         return (0);
 1907 }
 1908 
 1909 int
 1910 carp_join_multicast6(struct carp_softc *sc)
 1911 {
 1912         struct in6_multi_mship *imm, *imm2;
 1913         struct ip6_moptions *im6o = &sc->sc_im6o;
 1914         struct sockaddr_in6 addr6;
 1915         int error;
 1916 
 1917         /* Join IPv6 CARP multicast group */
 1918         memset(&addr6, 0, sizeof(addr6));
 1919         addr6.sin6_family = AF_INET6;
 1920         addr6.sin6_len = sizeof(addr6);
 1921         addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
 1922         addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
 1923         addr6.sin6_addr.s6_addr8[15] = 0x12;
 1924         if ((imm = in6_joingroup(&sc->sc_if,
 1925             &addr6.sin6_addr, &error)) == NULL) {
 1926                 return (error);
 1927         }
 1928         /* join solicited multicast address */
 1929         memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
 1930         addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
 1931         addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
 1932         addr6.sin6_addr.s6_addr32[1] = 0;
 1933         addr6.sin6_addr.s6_addr32[2] = htonl(1);
 1934         addr6.sin6_addr.s6_addr32[3] = 0;
 1935         addr6.sin6_addr.s6_addr8[12] = 0xff;
 1936         if ((imm2 = in6_joingroup(&sc->sc_if,
 1937             &addr6.sin6_addr, &error)) == NULL) {
 1938                 in6_leavegroup(imm);
 1939                 return (error);
 1940         }
 1941 
 1942         /* apply v6 multicast membership */
 1943         im6o->im6o_ifidx = sc->sc_if.if_index;
 1944         if (imm)
 1945                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
 1946                     i6mm_chain);
 1947         if (imm2)
 1948                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
 1949                     i6mm_chain);
 1950 
 1951         return (0);
 1952 }
 1953 
 1954 #endif /* INET6 */
 1955 
 1956 int
 1957 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
 1958 {
 1959         struct proc *p = curproc;       /* XXX */
 1960         struct carp_softc *sc = ifp->if_softc;
 1961         struct carp_vhost_entry *vhe;
 1962         struct carpreq carpr;
 1963         struct ifaddr *ifa = (struct ifaddr *)addr;
 1964         struct ifreq *ifr = (struct ifreq *)addr;
 1965         struct ifnet *ifp0 = NULL;
 1966         int i, error = 0;
 1967 
 1968         switch (cmd) {
 1969         case SIOCSIFADDR:
 1970                 if (sc->sc_carpdevidx == 0)
 1971                         return (EINVAL);
 1972 
 1973                 switch (ifa->ifa_addr->sa_family) {
 1974                 case AF_INET:
 1975                         sc->sc_if.if_flags |= IFF_UP;
 1976                         error = carp_set_addr(sc, satosin(ifa->ifa_addr));
 1977                         break;
 1978 #ifdef INET6
 1979                 case AF_INET6:
 1980                         sc->sc_if.if_flags |= IFF_UP;
 1981                         error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
 1982                         break;
 1983 #endif /* INET6 */
 1984                 default:
 1985                         error = EAFNOSUPPORT;
 1986                         break;
 1987                 }
 1988                 break;
 1989 
 1990         case SIOCSIFFLAGS:
 1991                 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 1992                 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
 1993                 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
 1994                         carp_del_all_timeouts(sc);
 1995 
 1996                         /* we need the interface up to bow out */
 1997                         sc->sc_if.if_flags |= IFF_UP;
 1998                         sc->sc_bow_out = 1;
 1999                         carp_vhe_send_ad_all(sc);
 2000                         sc->sc_bow_out = 0;
 2001 
 2002                         sc->sc_if.if_flags &= ~IFF_UP;
 2003                         carp_set_state_all(sc, INIT);
 2004                         carp_setrun_all(sc, 0);
 2005                 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
 2006                         sc->sc_if.if_flags |= IFF_UP;
 2007                         carp_setrun_all(sc, 0);
 2008                 }
 2009                 break;
 2010 
 2011         case SIOCSVH:
 2012                 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 2013                 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
 2014                 if ((error = suser(p)) != 0)
 2015                         break;
 2016                 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
 2017                         break;
 2018                 error = 1;
 2019                 if (carpr.carpr_carpdev[0] != '\0' &&
 2020                     (ifp0 = if_unit(carpr.carpr_carpdev)) == NULL)
 2021                         return (EINVAL);
 2022                 if (carpr.carpr_peer.s_addr == 0)
 2023                         sc->sc_peer.s_addr = INADDR_CARP_GROUP;
 2024                 else
 2025                         sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
 2026                 if (ifp0 != NULL && ifp0->if_index != sc->sc_carpdevidx) {
 2027                         if ((error = carp_set_ifp(sc, ifp0))) {
 2028                                 if_put(ifp0);
 2029                                 return (error);
 2030                         }
 2031                 }
 2032                 if_put(ifp0);
 2033                 if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
 2034                         switch (carpr.carpr_state) {
 2035                         case BACKUP:
 2036                                 timeout_del(&vhe->ad_tmo);
 2037                                 carp_set_state_all(sc, BACKUP);
 2038                                 carp_setrun_all(sc, 0);
 2039                                 break;
 2040                         case MASTER:
 2041                                 KERNEL_ASSERT_LOCKED();
 2042                                 /* touching carp_vhosts */
 2043                                 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
 2044                                     vhost_entries)
 2045                                         carp_master_down(vhe);
 2046                                 break;
 2047                         default:
 2048                                 break;
 2049                         }
 2050                 }
 2051                 if ((error = carp_vhids_ioctl(sc, &carpr)))
 2052                         return (error);
 2053                 if (carpr.carpr_advbase >= 0) {
 2054                         if (carpr.carpr_advbase > 255) {
 2055                                 error = EINVAL;
 2056                                 break;
 2057                         }
 2058                         sc->sc_advbase = carpr.carpr_advbase;
 2059                         error--;
 2060                 }
 2061                 if (memcmp(sc->sc_advskews, carpr.carpr_advskews,
 2062                     sizeof(sc->sc_advskews))) {
 2063                         i = 0;
 2064                         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 2065                         SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
 2066                             vhost_entries)
 2067                                 vhe->advskew = carpr.carpr_advskews[i++];
 2068                         bcopy(carpr.carpr_advskews, sc->sc_advskews,
 2069                             sizeof(sc->sc_advskews));
 2070                 }
 2071                 if (sc->sc_balancing != carpr.carpr_balancing) {
 2072                         if (carpr.carpr_balancing > CARP_BAL_MAXID) {
 2073                                 error = EINVAL;
 2074                                 break;
 2075                         }
 2076                         sc->sc_balancing = carpr.carpr_balancing;
 2077                         carp_set_enaddr(sc);
 2078                         carp_update_lsmask(sc);
 2079                 }
 2080                 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
 2081                 if (error > 0)
 2082                         error = EINVAL;
 2083                 else {
 2084                         error = 0;
 2085                         carp_hmac_prepare(sc);
 2086                         carp_setrun_all(sc, 0);
 2087                 }
 2088                 break;
 2089 
 2090         case SIOCGVH:
 2091                 memset(&carpr, 0, sizeof(carpr));
 2092                 if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL)
 2093                         strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ);
 2094                 if_put(ifp0);
 2095                 i = 0;
 2096                 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 2097                 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
 2098                         carpr.carpr_vhids[i] = vhe->vhid;
 2099                         carpr.carpr_advskews[i] = vhe->advskew;
 2100                         carpr.carpr_states[i] = vhe->state;
 2101                         i++;
 2102                 }
 2103                 carpr.carpr_advbase = sc->sc_advbase;
 2104                 carpr.carpr_balancing = sc->sc_balancing;
 2105                 if (suser(p) == 0)
 2106                         bcopy(sc->sc_key, carpr.carpr_key,
 2107                             sizeof(carpr.carpr_key));
 2108                 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
 2109                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
 2110                 break;
 2111 
 2112         case SIOCADDMULTI:
 2113                 error = carp_ether_addmulti(sc, ifr);
 2114                 break;
 2115 
 2116         case SIOCDELMULTI:
 2117                 error = carp_ether_delmulti(sc, ifr);
 2118                 break;
 2119         case SIOCAIFGROUP:
 2120         case SIOCDIFGROUP:
 2121                 if (sc->sc_demote_cnt)
 2122                         carp_ifgroup_ioctl(ifp, cmd, addr);
 2123                 break;
 2124         case SIOCSIFGATTR:
 2125                 carp_ifgattr_ioctl(ifp, cmd, addr);
 2126                 break;
 2127         default:
 2128                 error = ENOTTY;
 2129         }
 2130 
 2131         if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
 2132                 carp_set_enaddr(sc);
 2133         return (error);
 2134 }
 2135 
 2136 int
 2137 carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif,
 2138     struct carpreq *carpr)
 2139 {
 2140         struct carp_softc *vr;
 2141         struct carp_vhost_entry *vhe, *vhe0;
 2142         int i;
 2143 
 2144         KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
 2145 
 2146         SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
 2147                 if (vr == sc)
 2148                         continue;
 2149                 SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) {
 2150                         if (carpr) {
 2151                                 for (i = 0; carpr->carpr_vhids[i]; i++) {
 2152                                         if (vhe->vhid == carpr->carpr_vhids[i])
 2153                                                 return (EINVAL);
 2154                                 }
 2155                         }
 2156                         SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts,
 2157                             vhost_entries) {
 2158                                 if (vhe->vhid == vhe0->vhid)
 2159                                         return (EINVAL);
 2160                         }
 2161                 }
 2162         }
 2163         return (0);
 2164 }
 2165 
 2166 int
 2167 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
 2168 {
 2169         int i, j;
 2170         u_int8_t taken_vhids[256];
 2171 
 2172         if (carpr->carpr_vhids[0] == 0 ||
 2173             !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
 2174                 return (0);
 2175 
 2176         memset(taken_vhids, 0, sizeof(taken_vhids));
 2177         for (i = 0; carpr->carpr_vhids[i]; i++) {
 2178                 struct ifnet *ifp;
 2179 
 2180                 if (taken_vhids[carpr->carpr_vhids[i]])
 2181                         return (EINVAL);
 2182                 taken_vhids[carpr->carpr_vhids[i]] = 1;
 2183 
 2184                 if ((ifp = if_get(sc->sc_carpdevidx)) != NULL) {
 2185                         struct srpl *cif;
 2186                         cif = &ifp->if_carp;
 2187                         if (carp_check_dup_vhids(sc, cif, carpr)) {
 2188                                 if_put(ifp);
 2189                                 return (EINVAL);
 2190                         }
 2191                 }
 2192                 if_put(ifp);
 2193                 if (carpr->carpr_advskews[i] >= 255)
 2194                         return (EINVAL);
 2195         }
 2196         /* set sane balancing defaults */
 2197         if (i <= 1)
 2198                 carpr->carpr_balancing = CARP_BAL_NONE;
 2199         else if (carpr->carpr_balancing == CARP_BAL_NONE &&
 2200             sc->sc_balancing == CARP_BAL_NONE)
 2201                 carpr->carpr_balancing = CARP_BAL_IP;
 2202 
 2203         /* destroy all */
 2204         carp_del_all_timeouts(sc);
 2205         carp_destroy_vhosts(sc);
 2206         memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids));
 2207 
 2208         /* sort vhosts list by vhid */
 2209         for (j = 1; j <= 255; j++) {
 2210                 for (i = 0; carpr->carpr_vhids[i]; i++) {
 2211                         if (carpr->carpr_vhids[i] != j)
 2212                                 continue;
 2213                         if (carp_new_vhost(sc, carpr->carpr_vhids[i],
 2214                             carpr->carpr_advskews[i]))
 2215                                 return (ENOMEM);
 2216                         sc->sc_vhids[i] = carpr->carpr_vhids[i];
 2217                         sc->sc_advskews[i] = carpr->carpr_advskews[i];
 2218                 }
 2219         }
 2220         carp_set_enaddr(sc);
 2221         carp_set_state_all(sc, INIT);
 2222         return (0);
 2223 }
 2224 
 2225 void
 2226 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
 2227 {
 2228         struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
 2229         struct ifg_list *ifgl;
 2230         int *dm, adj;
 2231 
 2232         if (!strcmp(ifgr->ifgr_group, IFG_ALL))
 2233                 return;
 2234         adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
 2235         if (cmd == SIOCDIFGROUP)
 2236                 adj = adj * -1;
 2237 
 2238         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 2239                 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
 2240                         dm = &ifgl->ifgl_group->ifg_carp_demoted;
 2241                         if (*dm + adj >= 0)
 2242                                 *dm += adj;
 2243                         else
 2244                                 *dm = 0;
 2245                 }
 2246 }
 2247 
 2248 void
 2249 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
 2250 {
 2251         struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
 2252         struct carp_softc *sc = ifp->if_softc;
 2253 
 2254         if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
 2255             (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
 2256                 carp_vhe_send_ad_all(sc);
 2257 }
 2258 
 2259 void
 2260 carp_start(struct ifnet *ifp)
 2261 {
 2262         struct carp_softc *sc = ifp->if_softc;
 2263         struct ifnet *ifp0;
 2264         struct mbuf *m;
 2265 
 2266         if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) {
 2267                 ifq_purge(&ifp->if_snd);
 2268                 return;
 2269         }
 2270 
 2271         while ((m = ifq_dequeue(&ifp->if_snd)) != NULL)
 2272                 carp_transmit(sc, ifp0, m);
 2273         if_put(ifp0);
 2274 }
 2275 
 2276 void
 2277 carp_transmit(struct carp_softc *sc, struct ifnet *ifp0, struct mbuf *m)
 2278 {
 2279         struct ifnet *ifp = &sc->sc_if;
 2280 
 2281 #if NBPFILTER > 0
 2282         {
 2283                 caddr_t if_bpf = ifp->if_bpf;
 2284                 if (if_bpf)
 2285                         bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
 2286         }
 2287 #endif /* NBPFILTER > 0 */
 2288 
 2289         if (!ISSET(ifp0->if_flags, IFF_RUNNING)) {
 2290                 counters_inc(ifp->if_counters, ifc_oerrors);
 2291                 m_freem(m);
 2292                 return;
 2293         }
 2294 
 2295         /*
 2296          * Do not leak the multicast address when sending
 2297          * advertisements in 'ip' and 'ip-stealth' balancing
 2298          * modes.
 2299          */
 2300         if (sc->sc_balancing == CARP_BAL_IP ||
 2301             sc->sc_balancing == CARP_BAL_IPSTEALTH) {
 2302                 struct ether_header *eh = mtod(m, struct ether_header *);
 2303                 memcpy(eh->ether_shost, sc->sc_ac.ac_enaddr,
 2304                     sizeof(eh->ether_shost));
 2305         }
 2306 
 2307         if (if_enqueue(ifp0, m))
 2308                 counters_inc(ifp->if_counters, ifc_oerrors);
 2309 }
 2310 
 2311 int
 2312 carp_enqueue(struct ifnet *ifp, struct mbuf *m)
 2313 {
 2314         struct carp_softc *sc = ifp->if_softc;
 2315         struct ifnet *ifp0;
 2316 
 2317         /* no ifq_is_priq, cos hfsc on carp doesn't make sense */
 2318 
 2319         /*
 2320          * If the parent of this carp(4) got destroyed while
 2321          * `m' was being processed, silently drop it.
 2322          */
 2323         if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) {
 2324                 m_freem(m);
 2325                 return (0);
 2326         }
 2327 
 2328         counters_pkt(ifp->if_counters,
 2329             ifc_opackets, ifc_obytes, m->m_pkthdr.len);
 2330         carp_transmit(sc, ifp0, m);
 2331         if_put(ifp0);
 2332 
 2333         return (0);
 2334 }
 2335 
 2336 int
 2337 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
 2338     struct rtentry *rt)
 2339 {
 2340         struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
 2341         struct carp_vhost_entry *vhe;
 2342         struct srp_ref sr;
 2343         int ismaster;
 2344 
 2345         if (sc->cur_vhe == NULL) {
 2346                 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
 2347                 ismaster = (vhe->state == MASTER);
 2348                 SRPL_LEAVE(&sr);
 2349         } else {
 2350                 ismaster = (sc->cur_vhe->state == MASTER);
 2351         }
 2352 
 2353         if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) {
 2354                 m_freem(m);
 2355                 return (ENETUNREACH);
 2356         }
 2357 
 2358         return (ether_output(ifp, m, sa, rt));
 2359 }
 2360 
 2361 void
 2362 carp_set_state_all(struct carp_softc *sc, int state)
 2363 {
 2364         struct carp_vhost_entry *vhe;
 2365 
 2366         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 2367 
 2368         SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
 2369                 if (vhe->state == state)
 2370                         continue;
 2371 
 2372                 carp_set_state(vhe, state);
 2373         }
 2374 }
 2375 
 2376 void
 2377 carp_set_state(struct carp_vhost_entry *vhe, int state)
 2378 {
 2379         struct carp_softc *sc = vhe->parent_sc;
 2380         static const char *carp_states[] = { CARP_STATES };
 2381         int loglevel;
 2382         struct carp_vhost_entry *vhe0;
 2383 
 2384         KASSERT(vhe->state != state);
 2385 
 2386         if (vhe->state == INIT || state == INIT)
 2387                 loglevel = LOG_WARNING;
 2388         else
 2389                 loglevel = LOG_CRIT;
 2390 
 2391         if (sc->sc_vhe_count > 1)
 2392                 CARP_LOG(loglevel, sc,
 2393                     ("state transition (vhid %d): %s -> %s", vhe->vhid,
 2394                     carp_states[vhe->state], carp_states[state]));
 2395         else
 2396                 CARP_LOG(loglevel, sc,
 2397                     ("state transition: %s -> %s",
 2398                     carp_states[vhe->state], carp_states[state]));
 2399 
 2400         vhe->state = state;
 2401         carp_update_lsmask(sc);
 2402 
 2403         KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
 2404 
 2405         sc->sc_if.if_link_state = LINK_STATE_INVALID;
 2406         SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
 2407                 /*
 2408                  * Link must be up if at least one vhe is in state MASTER to
 2409                  * bring or keep route up.
 2410                  */
 2411                 if (vhe0->state == MASTER) {
 2412                         sc->sc_if.if_link_state = LINK_STATE_UP;
 2413                         break;
 2414                 } else if (vhe0->state == BACKUP) {
 2415                         sc->sc_if.if_link_state = LINK_STATE_DOWN;
 2416                 }
 2417         }
 2418         if_link_state_change(&sc->sc_if);
 2419 }
 2420 
 2421 void
 2422 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
 2423 {
 2424         struct ifg_list *ifgl;
 2425         int *dm, need_ad;
 2426         struct carp_softc *nil = NULL;
 2427 
 2428         if (ifp->if_type == IFT_CARP) {
 2429                 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
 2430                 if (*dm + adj >= 0)
 2431                         *dm += adj;
 2432                 else
 2433                         *dm = 0;
 2434         }
 2435 
 2436         need_ad = 0;
 2437         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
 2438                 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
 2439                         continue;
 2440                 dm = &ifgl->ifgl_group->ifg_carp_demoted;
 2441 
 2442                 if (*dm + adj >= 0)
 2443                         *dm += adj;
 2444                 else
 2445                         *dm = 0;
 2446 
 2447                 if (adj > 0 && *dm == 1)
 2448                         need_ad = 1;
 2449                 CARP_LOG(LOG_ERR, nil,
 2450                     ("%s demoted group %s by %d to %d (%s)",
 2451                     ifp->if_xname, ifgl->ifgl_group->ifg_group,
 2452                     adj, *dm, reason));
 2453         }
 2454         if (need_ad)
 2455                 carp_send_ad_all();
 2456 }
 2457 
 2458 int
 2459 carp_group_demote_count(struct carp_softc *sc)
 2460 {
 2461         struct ifg_list *ifgl;
 2462         int count = 0;
 2463 
 2464         TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
 2465                 count += ifgl->ifgl_group->ifg_carp_demoted;
 2466 
 2467         if (count == 0 && sc->sc_demote_cnt)
 2468                 count = sc->sc_demote_cnt;
 2469 
 2470         return (count > 255 ? 255 : count);
 2471 }
 2472 
 2473 void
 2474 carp_carpdev_state(void *v)
 2475 {
 2476         struct carp_softc *sc = v;
 2477         struct ifnet *ifp0;
 2478         int suppressed = sc->sc_suppress;
 2479 
 2480         if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL)
 2481                 return;
 2482 
 2483         if (ifp0->if_link_state == LINK_STATE_DOWN ||
 2484             !(ifp0->if_flags & IFF_UP)) {
 2485                 sc->sc_if.if_flags &= ~IFF_RUNNING;
 2486                 carp_del_all_timeouts(sc);
 2487                 carp_set_state_all(sc, INIT);
 2488                 sc->sc_suppress = 1;
 2489                 carp_setrun_all(sc, 0);
 2490                 if (!suppressed)
 2491                         carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
 2492         } else if (suppressed) {
 2493                 carp_set_state_all(sc, INIT);
 2494                 sc->sc_suppress = 0;
 2495                 carp_setrun_all(sc, 0);
 2496                 carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
 2497         }
 2498 
 2499         if_put(ifp0);
 2500 }
 2501 
 2502 int
 2503 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
 2504 {
 2505         struct ifnet *ifp0;
 2506         struct carp_mc_entry *mc;
 2507         u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
 2508         int error;
 2509 
 2510         ifp0 = if_get(sc->sc_carpdevidx);
 2511         if (ifp0 == NULL)
 2512                 return (EINVAL);
 2513 
 2514         error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
 2515         if (error != ENETRESET) {
 2516                 if_put(ifp0);
 2517                 return (error);
 2518         }
 2519 
 2520         /*
 2521          * This is new multicast address.  We have to tell parent
 2522          * about it.  Also, remember this multicast address so that
 2523          * we can delete them on unconfigure.
 2524          */
 2525         mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT);
 2526         if (mc == NULL) {
 2527                 error = ENOMEM;
 2528                 goto alloc_failed;
 2529         }
 2530 
 2531         /*
 2532          * As ether_addmulti() returns ENETRESET, following two
 2533          * statement shouldn't fail.
 2534          */
 2535         (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
 2536         ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
 2537         memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
 2538         LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
 2539 
 2540         error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr);
 2541         if (error != 0)
 2542                 goto ioctl_failed;
 2543 
 2544         if_put(ifp0);
 2545 
 2546         return (error);
 2547 
 2548  ioctl_failed:
 2549         LIST_REMOVE(mc, mc_entries);
 2550         free(mc, M_DEVBUF, sizeof(*mc));
 2551  alloc_failed:
 2552         (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
 2553         if_put(ifp0);
 2554 
 2555         return (error);
 2556 }
 2557 
 2558 int
 2559 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
 2560 {
 2561         struct ifnet *ifp0;
 2562         struct ether_multi *enm;
 2563         struct carp_mc_entry *mc;
 2564         u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
 2565         int error;
 2566 
 2567         ifp0 = if_get(sc->sc_carpdevidx);
 2568         if (ifp0 == NULL)
 2569                 return (EINVAL);
 2570 
 2571         /*
 2572          * Find a key to lookup carp_mc_entry.  We have to do this
 2573          * before calling ether_delmulti for obvious reason.
 2574          */
 2575         if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
 2576                 goto rele;
 2577         ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
 2578         if (enm == NULL) {
 2579                 error = EINVAL;
 2580                 goto rele;
 2581         }
 2582 
 2583         LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
 2584                 if (mc->mc_enm == enm)
 2585                         break;
 2586 
 2587         /* We won't delete entries we didn't add */
 2588         if (mc == NULL) {
 2589                 error = EINVAL;
 2590                 goto rele;
 2591         }
 2592 
 2593         error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
 2594         if (error != ENETRESET)
 2595                 goto rele;
 2596 
 2597         /* We no longer use this multicast address.  Tell parent so. */
 2598         error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
 2599         if (error == 0) {
 2600                 /* And forget about this address. */
 2601                 LIST_REMOVE(mc, mc_entries);
 2602                 free(mc, M_DEVBUF, sizeof(*mc));
 2603         } else
 2604                 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
 2605 rele:
 2606         if_put(ifp0);
 2607         return (error);
 2608 }
 2609 
 2610 /*
 2611  * Delete any multicast address we have asked to add from parent
 2612  * interface.  Called when the carp is being unconfigured.
 2613  */
 2614 void
 2615 carp_ether_purgemulti(struct carp_softc *sc)
 2616 {
 2617         struct ifnet *ifp0;             /* Parent. */
 2618         struct carp_mc_entry *mc;
 2619         union {
 2620                 struct ifreq ifreq;
 2621                 struct {
 2622                         char ifr_name[IFNAMSIZ];
 2623                         struct sockaddr_storage ifr_ss;
 2624                 } ifreq_storage;
 2625         } u;
 2626         struct ifreq *ifr = &u.ifreq;
 2627 
 2628         if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL)
 2629                 return;
 2630 
 2631         memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ);
 2632         while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
 2633                 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
 2634                 (void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
 2635                 LIST_REMOVE(mc, mc_entries);
 2636                 free(mc, M_DEVBUF, sizeof(*mc));
 2637         }
 2638 
 2639         if_put(ifp0);
 2640 }
 2641 
 2642 void
 2643 carp_vh_ref(void *null, void *v)
 2644 {
 2645         struct carp_vhost_entry *vhe = v;
 2646 
 2647         refcnt_take(&vhe->vhost_refcnt);
 2648 }
 2649 
 2650 void
 2651 carp_vh_unref(void *null, void *v)
 2652 {
 2653         struct carp_vhost_entry *vhe = v;
 2654 
 2655         if (refcnt_rele(&vhe->vhost_refcnt)) {
 2656                 carp_sc_unref(NULL, vhe->parent_sc);
 2657                 free(vhe, M_DEVBUF, sizeof(*vhe));
 2658         }
 2659 }
 2660 
 2661 void
 2662 carp_sc_ref(void *null, void *s)
 2663 {
 2664         struct carp_softc *sc = s;
 2665 
 2666         refcnt_take(&sc->sc_refcnt);
 2667 }
 2668 
 2669 void
 2670 carp_sc_unref(void *null, void *s)
 2671 {
 2672         struct carp_softc *sc = s;
 2673 
 2674         refcnt_rele_wake(&sc->sc_refcnt);
 2675 }

Cache object: 64bb0717006b39dab63d76b959ce78a6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.