The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_carp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: ip_carp.c,v 1.117 2022/09/02 23:48:11 thorpej Exp $    */
    2 /*      $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $   */
    3 
    4 /*
    5  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
    6  * Copyright (c) 2003 Ryan McBride. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
   21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
   27  * THE POSSIBILITY OF SUCH DAMAGE.
   28  */
   29 
   30 #ifdef _KERNEL_OPT
   31 #include "opt_inet.h"
   32 #include "opt_mbuftrace.h"
   33 #endif
   34 
   35 #include <sys/cdefs.h>
   36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.117 2022/09/02 23:48:11 thorpej Exp $");
   37 
   38 /*
   39  * TODO:
   40  *      - iface reconfigure
   41  *      - support for hardware checksum calculations;
   42  *
   43  */
   44 
   45 #include <sys/param.h>
   46 #include <sys/proc.h>
   47 #include <sys/mbuf.h>
   48 #include <sys/socket.h>
   49 #include <sys/socketvar.h>
   50 #include <sys/callout.h>
   51 #include <sys/ioctl.h>
   52 #include <sys/errno.h>
   53 #include <sys/device.h>
   54 #include <sys/time.h>
   55 #include <sys/kernel.h>
   56 #include <sys/kauth.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/ucred.h>
   59 #include <sys/syslog.h>
   60 #include <sys/acct.h>
   61 #include <sys/cprng.h>
   62 #include <sys/cpu.h>
   63 #include <sys/pserialize.h>
   64 #include <sys/psref.h>
   65 
   66 #include <net/if.h>
   67 #include <net/pfil.h>
   68 #include <net/if_types.h>
   69 #include <net/if_ether.h>
   70 #include <net/route.h>
   71 #include <net/net_stats.h>
   72 #include <netinet/if_inarp.h>
   73 #include <netinet/wqinput.h>
   74 
   75 #ifdef INET
   76 #include <netinet/in.h>
   77 #include <netinet/in_systm.h>
   78 #include <netinet/in_var.h>
   79 #include <netinet/ip.h>
   80 #include <netinet/ip_var.h>
   81 
   82 #include <net/if_dl.h>
   83 #endif
   84 
   85 #ifdef INET6
   86 #include <netinet/icmp6.h>
   87 #include <netinet/ip6.h>
   88 #include <netinet6/ip6_var.h>
   89 #include <netinet6/nd6.h>
   90 #include <netinet6/scope6_var.h>
   91 #include <netinet6/in6_var.h>
   92 #endif
   93 
   94 #include <net/bpf.h>
   95 
   96 #include <sys/sha1.h>
   97 
   98 #include <netinet/ip_carp.h>
   99 
  100 #include "ioconf.h"
  101 
  102 struct carp_mc_entry {
  103         LIST_ENTRY(carp_mc_entry)       mc_entries;
  104         union {
  105                 struct ether_multi      *mcu_enm;
  106         } mc_u;
  107         struct sockaddr_storage         mc_addr;
  108 };
  109 #define mc_enm  mc_u.mcu_enm
  110 
  111 struct carp_softc {
  112         struct ethercom sc_ac;
  113 #define sc_if           sc_ac.ec_if
  114 #define sc_carpdev      sc_ac.ec_if.if_carpdev
  115         void *sc_linkstate_hook;
  116         int ah_cookie;
  117         int lh_cookie;
  118         struct ip_moptions sc_imo;
  119 #ifdef INET6
  120         struct ip6_moptions sc_im6o;
  121 #endif /* INET6 */
  122         TAILQ_ENTRY(carp_softc) sc_list;
  123 
  124         enum { INIT = 0, BACKUP, MASTER }       sc_state;
  125 
  126         int sc_suppress;
  127         int sc_bow_out;
  128 
  129         int sc_sendad_errors;
  130 #define CARP_SENDAD_MAX_ERRORS  3
  131         int sc_sendad_success;
  132 #define CARP_SENDAD_MIN_SUCCESS 3
  133 
  134         int sc_vhid;
  135         int sc_advskew;
  136         int sc_naddrs;
  137         int sc_naddrs6;
  138         int sc_advbase;         /* seconds */
  139         int sc_init_counter;
  140         u_int64_t sc_counter;
  141 
  142         /* authentication */
  143 #define CARP_HMAC_PAD   64
  144         unsigned char sc_key[CARP_KEY_LEN];
  145         unsigned char sc_pad[CARP_HMAC_PAD];
  146         SHA1_CTX sc_sha1;
  147         u_int32_t sc_hashkey[2];
  148 
  149         struct callout sc_ad_tmo;       /* advertisement timeout */
  150         struct callout sc_md_tmo;       /* master down timeout */
  151         struct callout sc_md6_tmo;      /* master down timeout */
  152 
  153         LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead;
  154 };
  155 
  156 int carp_suppress_preempt = 0;
  157 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 };        /* XXX for now */
  158 
  159 static percpu_t *carpstat_percpu;
  160 
  161 #define CARP_STATINC(x)         _NET_STATINC(carpstat_percpu, x)
  162 
  163 #ifdef MBUFTRACE
  164 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx");
  165 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx");
  166 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx");
  167 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx");
  168 #endif
  169 
  170 struct carp_if {
  171         TAILQ_HEAD(, carp_softc) vhif_vrs;
  172         int vhif_nvrs;
  173 
  174         struct ifnet *vhif_ifp;
  175 };
  176 
  177 #define CARP_LOG(sc, s)                                                 \
  178         if (carp_opts[CARPCTL_LOG]) {                                   \
  179                 if (sc)                                                 \
  180                         log(LOG_INFO, "%s: ",                           \
  181                             (sc)->sc_if.if_xname);                      \
  182                 else                                                    \
  183                         log(LOG_INFO, "carp: ");                        \
  184                 addlog s;                                               \
  185                 addlog("\n");                                           \
  186         }
  187 
  188 static void     carp_hmac_prepare(struct carp_softc *);
  189 static void     carp_hmac_generate(struct carp_softc *, u_int32_t *,
  190                     unsigned char *);
  191 static int      carp_hmac_verify(struct carp_softc *, u_int32_t *,
  192                     unsigned char *);
  193 static void     carp_setroute(struct carp_softc *, int);
  194 static void     carp_proto_input_c(struct mbuf *, struct carp_header *,
  195                     sa_family_t);
  196 static void     carpdetach(struct carp_softc *);
  197 static void     carp_prepare_ad(struct mbuf *, struct carp_softc *,
  198                     struct carp_header *);
  199 static void     carp_send_ad_all(void);
  200 static void     carp_send_ad(void *);
  201 static void     carp_send_arp(struct carp_softc *);
  202 static void     carp_master_down(void *);
  203 static int      carp_ioctl(struct ifnet *, u_long, void *);
  204 static void     carp_start(struct ifnet *);
  205 static void     carp_setrun(struct carp_softc *, sa_family_t);
  206 static void     carp_set_state(struct carp_softc *, int);
  207 static int      carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
  208 enum    { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
  209 
  210 static void     carp_multicast_cleanup(struct carp_softc *);
  211 static int      carp_set_ifp(struct carp_softc *, struct ifnet *);
  212 static void     carp_set_enaddr(struct carp_softc *);
  213 #if 0
  214 static void     carp_addr_updated(void *);
  215 #endif
  216 static u_int32_t        carp_hash(struct carp_softc *, u_char *);
  217 static int      carp_set_addr(struct carp_softc *, struct sockaddr_in *);
  218 static int      carp_join_multicast(struct carp_softc *);
  219 #ifdef INET6
  220 static void     carp_send_na(struct carp_softc *);
  221 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
  222 static int      carp_join_multicast6(struct carp_softc *);
  223 #endif
  224 static int      carp_clone_create(struct if_clone *, int);
  225 static int      carp_clone_destroy(struct ifnet *);
  226 static int      carp_ether_addmulti(struct carp_softc *, struct ifreq *);
  227 static int      carp_ether_delmulti(struct carp_softc *, struct ifreq *);
  228 static void     carp_ether_purgemulti(struct carp_softc *);
  229 static void     carp_update_link_state(struct carp_softc *sc);
  230 
  231 static void     sysctl_net_inet_carp_setup(struct sysctllog **);
  232 
  233 /* workqueue-based pr_input */
  234 static struct wqinput *carp_wqinput;
  235 static void _carp_proto_input(struct mbuf *, int, int);
  236 #ifdef INET6
  237 static struct wqinput *carp6_wqinput;
  238 static void _carp6_proto_input(struct mbuf *, int, int);
  239 #endif
  240 
  241 struct if_clone carp_cloner =
  242     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
  243 
  244 static __inline u_int16_t
  245 carp_cksum(struct mbuf *m, int len)
  246 {
  247         return (in_cksum(m, len));
  248 }
  249 
  250 #ifdef INET6
  251 static __inline u_int16_t
  252 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len)
  253 {
  254         return (in6_cksum(m, IPPROTO_CARP, off, len));
  255 }
  256 #endif
  257 
  258 static void
  259 carp_hmac_prepare(struct carp_softc *sc)
  260 {
  261         u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
  262         u_int8_t vhid = sc->sc_vhid & 0xff;
  263         SHA1_CTX sha1ctx;
  264         u_int32_t kmd[5];
  265         struct ifaddr *ifa;
  266         int i, found;
  267         struct in_addr last, cur, in;
  268 #ifdef INET6
  269         struct in6_addr last6, cur6, in6;
  270 #endif /* INET6 */
  271 
  272         /* compute ipad from key */
  273         memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
  274         memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
  275         for (i = 0; i < sizeof(sc->sc_pad); i++)
  276                 sc->sc_pad[i] ^= 0x36;
  277 
  278         /* precompute first part of inner hash */
  279         SHA1Init(&sc->sc_sha1);
  280         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
  281         SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
  282         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
  283 
  284         /* generate a key for the arpbalance hash, before the vhid is hashed */
  285         memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
  286         SHA1Final((unsigned char *)kmd, &sha1ctx);
  287         sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
  288         sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
  289 
  290         /* the rest of the precomputation */
  291         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
  292 
  293         /* Hash the addresses from smallest to largest, not interface order */
  294 #ifdef INET
  295         cur.s_addr = 0;
  296         do {
  297                 int s;
  298                 found = 0;
  299                 last = cur;
  300                 cur.s_addr = 0xffffffff;
  301                 s = pserialize_read_enter();
  302                 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
  303                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
  304                         if (ifa->ifa_addr->sa_family == AF_INET &&
  305                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
  306                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
  307                                 cur.s_addr = in.s_addr;
  308                                 found++;
  309                         }
  310                 }
  311                 pserialize_read_exit(s);
  312                 if (found)
  313                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
  314         } while (found);
  315 #endif /* INET */
  316 
  317 #ifdef INET6
  318         memset(&cur6, 0x00, sizeof(cur6));
  319         do {
  320                 int s;
  321                 found = 0;
  322                 last6 = cur6;
  323                 memset(&cur6, 0xff, sizeof(cur6));
  324                 s = pserialize_read_enter();
  325                 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
  326                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
  327                         if (IN6_IS_ADDR_LINKLOCAL(&in6))
  328                                 in6.s6_addr16[1] = 0;
  329                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
  330                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
  331                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
  332                                 cur6 = in6;
  333                                 found++;
  334                         }
  335                 }
  336                 pserialize_read_exit(s);
  337                 if (found)
  338                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
  339         } while (found);
  340 #endif /* INET6 */
  341 
  342         /* convert ipad to opad */
  343         for (i = 0; i < sizeof(sc->sc_pad); i++)
  344                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
  345 }
  346 
  347 static void
  348 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
  349     unsigned char md[20])
  350 {
  351         SHA1_CTX sha1ctx;
  352 
  353         /* fetch first half of inner hash */
  354         memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
  355 
  356         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
  357         SHA1Final(md, &sha1ctx);
  358 
  359         /* outer hash */
  360         SHA1Init(&sha1ctx);
  361         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
  362         SHA1Update(&sha1ctx, md, 20);
  363         SHA1Final(md, &sha1ctx);
  364 }
  365 
  366 static int
  367 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
  368     unsigned char md[20])
  369 {
  370         unsigned char md2[20];
  371 
  372         carp_hmac_generate(sc, counter, md2);
  373 
  374         return (memcmp(md, md2, sizeof(md2)));
  375 }
  376 
  377 static void
  378 carp_setroute(struct carp_softc *sc, int cmd)
  379 {
  380         struct ifaddr *ifa;
  381         int s, bound;
  382 
  383         KERNEL_LOCK(1, NULL);
  384         bound = curlwp_bind();
  385         s = pserialize_read_enter();
  386         IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
  387                 struct psref psref;
  388                 ifa_acquire(ifa, &psref);
  389                 pserialize_read_exit(s);
  390 
  391                 switch (ifa->ifa_addr->sa_family) {
  392                 case AF_INET: {
  393                         int count = 0;
  394                         struct rtentry *rt;
  395                         int hr_otherif, nr_ourif;
  396 
  397                         /*
  398                          * Avoid screwing with the routes if there are other
  399                          * carp interfaces which are master and have the same
  400                          * address.
  401                          */
  402                         if (sc->sc_carpdev != NULL &&
  403                             sc->sc_carpdev->if_carp != NULL) {
  404                                 count = carp_addrcount(
  405                                     (struct carp_if *)sc->sc_carpdev->if_carp,
  406                                     ifatoia(ifa), CARP_COUNT_MASTER);
  407                                 if ((cmd == RTM_ADD && count != 1) ||
  408                                     (cmd == RTM_DELETE && count != 0))
  409                                         goto next;
  410                         }
  411 
  412                         /* Remove the existing host route, if any */
  413                         rtrequest(RTM_DELETE, ifa->ifa_addr,
  414                             ifa->ifa_addr, ifa->ifa_netmask,
  415                             RTF_HOST, NULL);
  416 
  417                         rt = NULL;
  418                         (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
  419                             ifa->ifa_netmask, RTF_HOST, &rt);
  420                         hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
  421                             (rt->rt_flags & RTF_CONNECTED));
  422                         if (rt != NULL) {
  423                                 rt_unref(rt);
  424                                 rt = NULL;
  425                         }
  426 
  427                         /* Check for a network route on our interface */
  428 
  429                         rt = NULL;
  430                         (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
  431                             ifa->ifa_netmask, 0, &rt);
  432                         nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
  433 
  434                         switch (cmd) {
  435                         case RTM_ADD:
  436                                 if (hr_otherif) {
  437                                         ifa->ifa_rtrequest = NULL;
  438                                         ifa->ifa_flags &= ~RTF_CONNECTED;
  439 
  440                                         rtrequest(RTM_ADD, ifa->ifa_addr,
  441                                             ifa->ifa_addr, ifa->ifa_netmask,
  442                                             RTF_UP | RTF_HOST, NULL);
  443                                 }
  444                                 if (!hr_otherif || nr_ourif || !rt) {
  445                                         if (nr_ourif &&
  446                                             (rt->rt_flags & RTF_CONNECTED) == 0)
  447                                                 rtrequest(RTM_DELETE,
  448                                                     ifa->ifa_addr,
  449                                                     ifa->ifa_addr,
  450                                                     ifa->ifa_netmask, 0, NULL);
  451 
  452                                         ifa->ifa_rtrequest = arp_rtrequest;
  453                                         ifa->ifa_flags |= RTF_CONNECTED;
  454 
  455                                         if (rtrequest(RTM_ADD, ifa->ifa_addr,
  456                                             ifa->ifa_addr, ifa->ifa_netmask, 0,
  457                                             NULL) == 0)
  458                                                 ifa->ifa_flags |= IFA_ROUTE;
  459                                 }
  460                                 break;
  461                         case RTM_DELETE:
  462                                 break;
  463                         default:
  464                                 break;
  465                         }
  466                         if (rt != NULL) {
  467                                 rt_unref(rt);
  468                                 rt = NULL;
  469                         }
  470                         break;
  471                 }
  472 
  473 #ifdef INET6
  474                 case AF_INET6:
  475                         if (cmd == RTM_ADD)
  476                                 in6_ifaddlocal(ifa);
  477                         else
  478                                 in6_ifremlocal(ifa);
  479                         break;
  480 #endif /* INET6 */
  481                 default:
  482                         break;
  483                 }
  484         next:
  485                 s = pserialize_read_enter();
  486                 ifa_release(ifa, &psref);
  487         }
  488         pserialize_read_exit(s);
  489         curlwp_bindx(bound);
  490         KERNEL_UNLOCK_ONE(NULL);
  491 }
  492 
  493 /*
  494  * process input packet.
  495  * we have rearranged checks order compared to the rfc,
  496  * but it seems more efficient this way or not possible otherwise.
  497  */
  498 static void
  499 _carp_proto_input(struct mbuf *m, int hlen, int proto)
  500 {
  501         struct ip *ip = mtod(m, struct ip *);
  502         struct carp_softc *sc = NULL;
  503         struct carp_header *ch;
  504         int iplen, len;
  505         struct ifnet *rcvif;
  506 
  507         CARP_STATINC(CARP_STAT_IPACKETS);
  508         MCLAIM(m, &carp_proto_mowner_rx);
  509 
  510         if (!carp_opts[CARPCTL_ALLOW]) {
  511                 m_freem(m);
  512                 return;
  513         }
  514 
  515         rcvif = m_get_rcvif_NOMPSAFE(m);
  516         /* check if received on a valid carp interface */
  517         if (rcvif->if_type != IFT_CARP) {
  518                 CARP_STATINC(CARP_STAT_BADIF);
  519                 CARP_LOG(sc, ("packet received on non-carp interface: %s",
  520                     rcvif->if_xname));
  521                 m_freem(m);
  522                 return;
  523         }
  524 
  525         /* verify that the IP TTL is 255.  */
  526         if (ip->ip_ttl != CARP_DFLTTL) {
  527                 CARP_STATINC(CARP_STAT_BADTTL);
  528                 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
  529                     CARP_DFLTTL, rcvif->if_xname));
  530                 m_freem(m);
  531                 return;
  532         }
  533 
  534         /*
  535          * verify that the received packet length is
  536          * equal to the CARP header
  537          */
  538         iplen = ip->ip_hl << 2;
  539         len = iplen + sizeof(*ch);
  540         if (len > m->m_pkthdr.len) {
  541                 CARP_STATINC(CARP_STAT_BADLEN);
  542                 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
  543                     rcvif->if_xname));
  544                 m_freem(m);
  545                 return;
  546         }
  547 
  548         if ((m = m_pullup(m, len)) == NULL) {
  549                 CARP_STATINC(CARP_STAT_HDROPS);
  550                 return;
  551         }
  552         ip = mtod(m, struct ip *);
  553         ch = (struct carp_header *)((char *)ip + iplen);
  554         /* verify the CARP checksum */
  555         m->m_data += iplen;
  556         if (carp_cksum(m, len - iplen)) {
  557                 CARP_STATINC(CARP_STAT_BADSUM);
  558                 CARP_LOG(sc, ("checksum failed on %s",
  559                     rcvif->if_xname));
  560                 m_freem(m);
  561                 return;
  562         }
  563         m->m_data -= iplen;
  564 
  565         carp_proto_input_c(m, ch, AF_INET);
  566 }
  567 
  568 void
  569 carp_proto_input(struct mbuf *m, int off, int proto)
  570 {
  571 
  572         wqinput_input(carp_wqinput, m, 0, 0);
  573 }
  574 
  575 #ifdef INET6
  576 static void
  577 _carp6_proto_input(struct mbuf *m, int off, int proto)
  578 {
  579         struct carp_softc *sc = NULL;
  580         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
  581         struct carp_header *ch;
  582         u_int len;
  583         struct ifnet *rcvif;
  584 
  585         CARP_STATINC(CARP_STAT_IPACKETS6);
  586         MCLAIM(m, &carp_proto6_mowner_rx);
  587 
  588         if (!carp_opts[CARPCTL_ALLOW]) {
  589                 m_freem(m);
  590                 return;
  591         }
  592 
  593         rcvif = m_get_rcvif_NOMPSAFE(m);
  594 
  595         /* check if received on a valid carp interface */
  596         if (rcvif->if_type != IFT_CARP) {
  597                 CARP_STATINC(CARP_STAT_BADIF);
  598                 CARP_LOG(sc, ("packet received on non-carp interface: %s",
  599                     rcvif->if_xname));
  600                 m_freem(m);
  601                 return;
  602         }
  603 
  604         /* verify that the IP TTL is 255 */
  605         if (ip6->ip6_hlim != CARP_DFLTTL) {
  606                 CARP_STATINC(CARP_STAT_BADTTL);
  607                 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
  608                     CARP_DFLTTL, rcvif->if_xname));
  609                 m_freem(m);
  610                 return;
  611         }
  612 
  613         /* verify that we have a complete carp packet */
  614         len = m->m_len;
  615         M_REGION_GET(ch, struct carp_header *, m, off, sizeof(*ch));
  616         if (ch == NULL) {
  617                 CARP_STATINC(CARP_STAT_BADLEN);
  618                 CARP_LOG(sc, ("packet size %u too small", len));
  619                 return;
  620         }
  621 
  622         /* verify the CARP checksum */
  623         if (carp6_cksum(m, off, sizeof(*ch))) {
  624                 CARP_STATINC(CARP_STAT_BADSUM);
  625                 CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
  626                 m_freem(m);
  627                 return;
  628         }
  629 
  630         carp_proto_input_c(m, ch, AF_INET6);
  631         return;
  632 }
  633 
  634 int
  635 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
  636 {
  637 
  638         wqinput_input(carp6_wqinput, *mp, *offp, proto);
  639 
  640         return IPPROTO_DONE;
  641 }
  642 #endif /* INET6 */
  643 
  644 static void
  645 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
  646 {
  647         struct carp_softc *sc;
  648         u_int64_t tmp_counter;
  649         struct timeval sc_tv, ch_tv;
  650 
  651         TAILQ_FOREACH(sc, &((struct carp_if *)
  652             m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list)
  653                 if (sc->sc_vhid == ch->carp_vhid)
  654                         break;
  655 
  656         if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
  657             (IFF_UP|IFF_RUNNING)) {
  658                 CARP_STATINC(CARP_STAT_BADVHID);
  659                 m_freem(m);
  660                 return;
  661         }
  662 
  663         /*
  664          * Check if our own advertisement was duplicated
  665          * from a non simplex interface.
  666          * XXX If there is no address on our physical interface
  667          * there is no way to distinguish our ads from the ones
  668          * another carp host might have sent us.
  669          */
  670         if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
  671                 struct sockaddr sa;
  672                 struct ifaddr *ifa;
  673                 int s;
  674 
  675                 memset(&sa, 0, sizeof(sa));
  676                 sa.sa_family = af;
  677                 s = pserialize_read_enter();
  678                 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
  679 
  680                 if (ifa && af == AF_INET) {
  681                         struct ip *ip = mtod(m, struct ip *);
  682                         if (ip->ip_src.s_addr ==
  683                                         ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
  684                                 pserialize_read_exit(s);
  685                                 m_freem(m);
  686                                 return;
  687                         }
  688                 }
  689 #ifdef INET6
  690                 if (ifa && af == AF_INET6) {
  691                         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
  692                         struct in6_addr in6_src, in6_found;
  693 
  694                         in6_src = ip6->ip6_src;
  695                         in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
  696                         if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
  697                                 in6_src.s6_addr16[1] = 0;
  698                         if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
  699                                 in6_found.s6_addr16[1] = 0;
  700                         if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
  701                                 pserialize_read_exit(s);
  702                                 m_freem(m);
  703                                 return;
  704                         }
  705                 }
  706 #endif /* INET6 */
  707                 pserialize_read_exit(s);
  708         }
  709 
  710         nanotime(&sc->sc_if.if_lastchange);
  711         if_statadd2(&sc->sc_if, if_ipackets, 1, if_ibytes, m->m_pkthdr.len);
  712 
  713         /* verify the CARP version. */
  714         if (ch->carp_version != CARP_VERSION) {
  715                 CARP_STATINC(CARP_STAT_BADVER);
  716                 if_statinc(&sc->sc_if, if_ierrors);
  717                 CARP_LOG(sc, ("invalid version %d != %d",
  718                     ch->carp_version, CARP_VERSION));
  719                 m_freem(m);
  720                 return;
  721         }
  722 
  723         /* verify the hash */
  724         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
  725                 struct ip *ip;
  726                 char ipbuf[INET_ADDRSTRLEN];
  727 #ifdef INET6
  728                 struct ip6_hdr *ip6;
  729                 char ip6buf[INET6_ADDRSTRLEN];
  730 #endif
  731 
  732                 CARP_STATINC(CARP_STAT_BADAUTH);
  733                 if_statinc(&sc->sc_if, if_ierrors);
  734 
  735                 switch(af) {
  736                 case AF_INET:
  737                         ip = mtod(m, struct ip *);
  738                         CARP_LOG(sc, ("incorrect hash from %s", 
  739                             IN_PRINT(ipbuf, &ip->ip_src)));
  740                         break;
  741 
  742 #ifdef INET6
  743                 case AF_INET6:
  744                         ip6 = mtod(m, struct ip6_hdr *);
  745                         CARP_LOG(sc, ("incorrect hash from %s",
  746                             IN6_PRINT(ip6buf, &ip6->ip6_src)));
  747                         break;
  748 #endif
  749 
  750                 default: CARP_LOG(sc, ("incorrect hash"));
  751                         break;
  752                 }
  753                 m_freem(m);
  754                 return;
  755         }
  756 
  757         tmp_counter = ntohl(ch->carp_counter[0]);
  758         tmp_counter = tmp_counter<<32;
  759         tmp_counter += ntohl(ch->carp_counter[1]);
  760 
  761         /* XXX Replay protection goes here */
  762 
  763         sc->sc_init_counter = 0;
  764         sc->sc_counter = tmp_counter;
  765 
  766 
  767         sc_tv.tv_sec = sc->sc_advbase;
  768         if (carp_suppress_preempt && sc->sc_advskew <  240)
  769                 sc_tv.tv_usec = 240 * 1000000 / 256;
  770         else
  771                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
  772         ch_tv.tv_sec = ch->carp_advbase;
  773         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
  774 
  775         switch (sc->sc_state) {
  776         case INIT:
  777                 break;
  778         case MASTER:
  779                 /*
  780                  * If we receive an advertisement from a backup who's going to
  781                  * be more frequent than us, go into BACKUP state.
  782                  */
  783                 if (timercmp(&sc_tv, &ch_tv, >) ||
  784                     timercmp(&sc_tv, &ch_tv, ==)) {
  785                         callout_stop(&sc->sc_ad_tmo);
  786                         CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
  787                         carp_set_state(sc, BACKUP);
  788                         carp_setrun(sc, 0);
  789                         carp_setroute(sc, RTM_DELETE);
  790                 }
  791                 break;
  792         case BACKUP:
  793                 /*
  794                  * If we're pre-empting masters who advertise slower than us,
  795                  * and this one claims to be slower, treat him as down.
  796                  */
  797                 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
  798                         CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
  799                         carp_master_down(sc);
  800                         break;
  801                 }
  802 
  803                 /*
  804                  *  If the master is going to advertise at such a low frequency
  805                  *  that he's guaranteed to time out, we'd might as well just
  806                  *  treat him as timed out now.
  807                  */
  808                 sc_tv.tv_sec = sc->sc_advbase * 3;
  809                 if (timercmp(&sc_tv, &ch_tv, <)) {
  810                         CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
  811                         carp_master_down(sc);
  812                         break;
  813                 }
  814 
  815                 /*
  816                  * Otherwise, we reset the counter and wait for the next
  817                  * advertisement.
  818                  */
  819                 carp_setrun(sc, af);
  820                 break;
  821         }
  822 
  823         m_freem(m);
  824         return;
  825 }
  826 
  827 /*
  828  * Interface side of the CARP implementation.
  829  */
  830 
  831 /* ARGSUSED */
  832 void
  833 carpattach(int n)
  834 {
  835         if_clone_attach(&carp_cloner);
  836 
  837         carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
  838 }
  839 
  840 static int
  841 carp_clone_create(struct if_clone *ifc, int unit)
  842 {
  843         extern int ifqmaxlen;
  844         struct carp_softc *sc;
  845         struct ifnet *ifp;
  846 
  847         sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
  848         if (!sc)
  849                 return (ENOMEM);
  850 
  851         sc->sc_suppress = 0;
  852         sc->sc_advbase = CARP_DFLTINTV;
  853         sc->sc_vhid = -1;       /* required setting */
  854         sc->sc_advskew = 0;
  855         sc->sc_init_counter = 1;
  856         sc->sc_naddrs = sc->sc_naddrs6 = 0;
  857 #ifdef INET6
  858         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
  859 #endif /* INET6 */
  860 
  861         callout_init(&sc->sc_ad_tmo, 0);
  862         callout_init(&sc->sc_md_tmo, 0);
  863         callout_init(&sc->sc_md6_tmo, 0);
  864 
  865         callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
  866         callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
  867         callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
  868 
  869         LIST_INIT(&sc->carp_mc_listhead);
  870         ifp = &sc->sc_if;
  871         ifp->if_softc = sc;
  872         snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
  873             unit);
  874         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
  875         ifp->if_ioctl = carp_ioctl;
  876         ifp->if_start = carp_start;
  877         IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
  878         IFQ_SET_READY(&ifp->if_snd);
  879         if_initialize(ifp);
  880         ether_ifattach(ifp, NULL);
  881         /* Overwrite ethernet defaults */
  882         ifp->if_type = IFT_CARP;
  883         ifp->if_output = carp_output;
  884         ifp->if_link_state = LINK_STATE_DOWN;
  885         carp_set_enaddr(sc);
  886         if_register(ifp);
  887 
  888         return (0);
  889 }
  890 
  891 static int
  892 carp_clone_destroy(struct ifnet *ifp)
  893 {
  894         struct carp_softc *sc = ifp->if_softc;
  895 
  896         carpdetach(ifp->if_softc);
  897         ether_ifdetach(ifp);
  898         if_detach(ifp);
  899         callout_destroy(&sc->sc_ad_tmo);
  900         callout_destroy(&sc->sc_md_tmo);
  901         callout_destroy(&sc->sc_md6_tmo);
  902         free(ifp->if_softc, M_DEVBUF);
  903 
  904         return (0);
  905 }
  906 
  907 static void
  908 carpdetach(struct carp_softc *sc)
  909 {
  910         struct ifnet *ifp;
  911         struct carp_if *cif;
  912         int s;
  913 
  914         callout_stop(&sc->sc_ad_tmo);
  915         callout_stop(&sc->sc_md_tmo);
  916         callout_stop(&sc->sc_md6_tmo);
  917 
  918         if (sc->sc_suppress)
  919                 carp_suppress_preempt--;
  920         sc->sc_suppress = 0;
  921 
  922         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
  923                 carp_suppress_preempt--;
  924         sc->sc_sendad_errors = 0;
  925 
  926         carp_set_state(sc, INIT);
  927         sc->sc_if.if_flags &= ~IFF_UP;
  928         carp_setrun(sc, 0);
  929         carp_multicast_cleanup(sc);
  930 
  931         KERNEL_LOCK(1, NULL);
  932         s = splnet();
  933         ifp = sc->sc_carpdev;
  934         if (ifp != NULL) {
  935                 if_linkstate_change_disestablish(ifp,
  936                     sc->sc_linkstate_hook, NULL);
  937 
  938                 cif = (struct carp_if *)ifp->if_carp;
  939                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
  940                 if (!--cif->vhif_nvrs) {
  941                         ifpromisc(ifp, 0);
  942                         ifp->if_carp = NULL;
  943                         free(cif, M_IFADDR);
  944                 }
  945         }
  946         sc->sc_carpdev = NULL;
  947         splx(s);
  948         KERNEL_UNLOCK_ONE(NULL);
  949 }
  950 
  951 /* Detach an interface from the carp. */
  952 void
  953 carp_ifdetach(struct ifnet *ifp)
  954 {
  955         struct carp_softc *sc, *nextsc;
  956         struct carp_if *cif = (struct carp_if *)ifp->if_carp;
  957 
  958         for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
  959                 nextsc = TAILQ_NEXT(sc, sc_list);
  960                 carpdetach(sc);
  961         }
  962 }
  963 
  964 static void
  965 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
  966     struct carp_header *ch)
  967 {
  968         if (sc->sc_init_counter) {
  969                 /* this could also be seconds since unix epoch */
  970                 sc->sc_counter = cprng_fast64();
  971         } else
  972                 sc->sc_counter++;
  973 
  974         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
  975         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
  976 
  977         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
  978 }
  979 
  980 static void
  981 carp_send_ad_all(void)
  982 {
  983         struct ifnet *ifp;
  984         struct carp_if *cif;
  985         struct carp_softc *vh;
  986         int s;
  987         int bound = curlwp_bind();
  988 
  989         s = pserialize_read_enter();
  990         IFNET_READER_FOREACH(ifp) {
  991                 struct psref psref;
  992                 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
  993                         continue;
  994 
  995                 if_acquire(ifp, &psref);
  996                 pserialize_read_exit(s);
  997 
  998                 cif = (struct carp_if *)ifp->if_carp;
  999                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1000                         if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
 1001                             (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
 1002                                 carp_send_ad(vh);
 1003                 }
 1004 
 1005                 s = pserialize_read_enter();
 1006                 if_release(ifp, &psref);
 1007         }
 1008         pserialize_read_exit(s);
 1009         curlwp_bindx(bound);
 1010 }
 1011 
 1012 
 1013 static void
 1014 carp_send_ad(void *v)
 1015 {
 1016         struct carp_header ch;
 1017         struct timeval tv;
 1018         struct carp_softc *sc = v;
 1019         struct carp_header *ch_ptr;
 1020         struct mbuf *m;
 1021         int error, len, advbase, advskew, s;
 1022         struct sockaddr sa;
 1023 
 1024         KERNEL_LOCK(1, NULL);
 1025         s = splsoftnet();
 1026 
 1027         advbase = advskew = 0; /* Sssssh compiler */
 1028         if (sc->sc_carpdev == NULL) {
 1029                 if_statinc(&sc->sc_if, if_oerrors);
 1030                 goto retry_later;
 1031         }
 1032 
 1033         /* bow out if we've gone to backup (the carp interface is going down) */
 1034         if (sc->sc_bow_out) {
 1035                 sc->sc_bow_out = 0;
 1036                 advbase = 255;
 1037                 advskew = 255;
 1038         } else {
 1039                 advbase = sc->sc_advbase;
 1040                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
 1041                         advskew = sc->sc_advskew;
 1042                 else
 1043                         advskew = 240;
 1044                 tv.tv_sec = advbase;
 1045                 tv.tv_usec = advskew * 1000000 / 256;
 1046         }
 1047 
 1048         ch.carp_version = CARP_VERSION;
 1049         ch.carp_type = CARP_ADVERTISEMENT;
 1050         ch.carp_vhid = sc->sc_vhid;
 1051         ch.carp_advbase = advbase;
 1052         ch.carp_advskew = advskew;
 1053         ch.carp_authlen = 7;    /* XXX DEFINE */
 1054         ch.carp_pad1 = 0;       /* must be zero */
 1055         ch.carp_cksum = 0;
 1056 
 1057 
 1058 #ifdef INET
 1059         if (sc->sc_naddrs) {
 1060                 struct ip *ip;
 1061                 struct ifaddr *ifa;
 1062                 int _s;
 1063 
 1064                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
 1065                 if (m == NULL) {
 1066                         if_statinc(&sc->sc_if, if_oerrors);
 1067                         CARP_STATINC(CARP_STAT_ONOMEM);
 1068                         /* XXX maybe less ? */
 1069                         goto retry_later;
 1070                 }
 1071                 MCLAIM(m, &carp_proto_mowner_tx);
 1072                 len = sizeof(*ip) + sizeof(ch);
 1073                 m->m_pkthdr.len = len;
 1074                 m_reset_rcvif(m);
 1075                 m->m_len = len;
 1076                 m_align(m, m->m_len);
 1077                 m->m_flags |= M_MCAST;
 1078                 ip = mtod(m, struct ip *);
 1079                 ip->ip_v = IPVERSION;
 1080                 ip->ip_hl = sizeof(*ip) >> 2;
 1081                 ip->ip_tos = IPTOS_LOWDELAY;
 1082                 ip->ip_len = htons(len);
 1083                 ip->ip_id = 0;  /* no need for id, we don't support fragments */
 1084                 ip->ip_off = htons(IP_DF);
 1085                 ip->ip_ttl = CARP_DFLTTL;
 1086                 ip->ip_p = IPPROTO_CARP;
 1087                 ip->ip_sum = 0;
 1088 
 1089                 memset(&sa, 0, sizeof(sa));
 1090                 sa.sa_family = AF_INET;
 1091                 _s = pserialize_read_enter();
 1092                 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
 1093                 if (ifa == NULL)
 1094                         ip->ip_src.s_addr = 0;
 1095                 else
 1096                         ip->ip_src.s_addr =
 1097                             ifatoia(ifa)->ia_addr.sin_addr.s_addr;
 1098                 pserialize_read_exit(_s);
 1099                 ip->ip_dst.s_addr = INADDR_CARP_GROUP;
 1100 
 1101                 ch_ptr = (struct carp_header *)(&ip[1]);
 1102                 memcpy(ch_ptr, &ch, sizeof(ch));
 1103                 carp_prepare_ad(m, sc, ch_ptr);
 1104 
 1105                 m->m_data += sizeof(*ip);
 1106                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
 1107                 m->m_data -= sizeof(*ip);
 1108 
 1109                 nanotime(&sc->sc_if.if_lastchange);
 1110                 if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len);
 1111                 CARP_STATINC(CARP_STAT_OPACKETS);
 1112 
 1113                 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
 1114                     NULL);
 1115                 if (error) {
 1116                         if (error == ENOBUFS)
 1117                                 CARP_STATINC(CARP_STAT_ONOMEM);
 1118                         else
 1119                                 CARP_LOG(sc, ("ip_output failed: %d", error));
 1120                         if_statinc(&sc->sc_if, if_oerrors);
 1121                         if (sc->sc_sendad_errors < INT_MAX)
 1122                                 sc->sc_sendad_errors++;
 1123                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
 1124                                 carp_suppress_preempt++;
 1125                                 if (carp_suppress_preempt == 1)
 1126                                         carp_send_ad_all();
 1127                         }
 1128                         sc->sc_sendad_success = 0;
 1129                 } else {
 1130                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
 1131                                 if (++sc->sc_sendad_success >=
 1132                                     CARP_SENDAD_MIN_SUCCESS) {
 1133                                         carp_suppress_preempt--;
 1134                                         sc->sc_sendad_errors = 0;
 1135                                 }
 1136                         } else
 1137                                 sc->sc_sendad_errors = 0;
 1138                 }
 1139         }
 1140 #endif /* INET */
 1141 #ifdef INET6
 1142         if (sc->sc_naddrs6) {
 1143                 struct ip6_hdr *ip6;
 1144                 struct ifaddr *ifa;
 1145                 int _s;
 1146 
 1147                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
 1148                 if (m == NULL) {
 1149                         if_statinc(&sc->sc_if, if_oerrors);
 1150                         CARP_STATINC(CARP_STAT_ONOMEM);
 1151                         /* XXX maybe less ? */
 1152                         goto retry_later;
 1153                 }
 1154                 MCLAIM(m, &carp_proto6_mowner_tx);
 1155                 len = sizeof(*ip6) + sizeof(ch);
 1156                 m->m_pkthdr.len = len;
 1157                 m_reset_rcvif(m);
 1158                 m->m_len = len;
 1159                 m_align(m, m->m_len);
 1160                 m->m_flags |= M_MCAST;
 1161                 ip6 = mtod(m, struct ip6_hdr *);
 1162                 memset(ip6, 0, sizeof(*ip6));
 1163                 ip6->ip6_vfc |= IPV6_VERSION;
 1164                 ip6->ip6_hlim = CARP_DFLTTL;
 1165                 ip6->ip6_nxt = IPPROTO_CARP;
 1166 
 1167                 /* set the source address */
 1168                 memset(&sa, 0, sizeof(sa));
 1169                 sa.sa_family = AF_INET6;
 1170                 _s = pserialize_read_enter();
 1171                 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
 1172                 if (ifa == NULL)        /* This should never happen with IPv6 */
 1173                         memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
 1174                 else
 1175                         bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
 1176                             &ip6->ip6_src, sizeof(struct in6_addr));
 1177                 pserialize_read_exit(_s);
 1178                 /* set the multicast destination */
 1179 
 1180                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 1181                 ip6->ip6_dst.s6_addr8[15] = 0x12;
 1182                 if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) {
 1183                         if_statinc(&sc->sc_if, if_oerrors);
 1184                         m_freem(m);
 1185                         CARP_LOG(sc, ("in6_setscope failed"));
 1186                         goto retry_later;
 1187                 }
 1188 
 1189                 ch_ptr = (struct carp_header *)(&ip6[1]);
 1190                 memcpy(ch_ptr, &ch, sizeof(ch));
 1191                 carp_prepare_ad(m, sc, ch_ptr);
 1192 
 1193                 ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6),
 1194                     len - sizeof(*ip6));
 1195 
 1196                 nanotime(&sc->sc_if.if_lastchange);
 1197                 if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len);
 1198                 CARP_STATINC(CARP_STAT_OPACKETS6);
 1199 
 1200                 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
 1201                 if (error) {
 1202                         if (error == ENOBUFS)
 1203                                 CARP_STATINC(CARP_STAT_ONOMEM);
 1204                         else
 1205                                 CARP_LOG(sc, ("ip6_output failed: %d", error));
 1206                         if_statinc(&sc->sc_if, if_oerrors);
 1207                         if (sc->sc_sendad_errors < INT_MAX)
 1208                                 sc->sc_sendad_errors++;
 1209                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
 1210                                 carp_suppress_preempt++;
 1211                                 if (carp_suppress_preempt == 1)
 1212                                         carp_send_ad_all();
 1213                         }
 1214                         sc->sc_sendad_success = 0;
 1215                 } else {
 1216                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
 1217                                 if (++sc->sc_sendad_success >=
 1218                                     CARP_SENDAD_MIN_SUCCESS) {
 1219                                         carp_suppress_preempt--;
 1220                                         sc->sc_sendad_errors = 0;
 1221                                 }
 1222                         } else
 1223                                 sc->sc_sendad_errors = 0;
 1224                 }
 1225         }
 1226 #endif /* INET6 */
 1227 
 1228 retry_later:
 1229         splx(s);
 1230         KERNEL_UNLOCK_ONE(NULL);
 1231         if (advbase != 255 || advskew != 255)
 1232                 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
 1233 }
 1234 
 1235 /*
 1236  * Broadcast a gratuitous ARP request containing
 1237  * the virtual router MAC address for each IP address
 1238  * associated with the virtual router.
 1239  */
 1240 static void
 1241 carp_send_arp(struct carp_softc *sc)
 1242 {
 1243         struct ifaddr *ifa;
 1244         int s, bound;
 1245 
 1246         KERNEL_LOCK(1, NULL);
 1247         bound = curlwp_bind();
 1248         s = pserialize_read_enter();
 1249         IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
 1250                 struct psref psref;
 1251 
 1252                 if (ifa->ifa_addr->sa_family != AF_INET)
 1253                         continue;
 1254 
 1255                 ifa_acquire(ifa, &psref);
 1256                 pserialize_read_exit(s);
 1257 
 1258                 arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl));
 1259 
 1260                 s = pserialize_read_enter();
 1261                 ifa_release(ifa, &psref);
 1262         }
 1263         pserialize_read_exit(s);
 1264         curlwp_bindx(bound);
 1265         KERNEL_UNLOCK_ONE(NULL);
 1266 }
 1267 
 1268 #ifdef INET6
 1269 static void
 1270 carp_send_na(struct carp_softc *sc)
 1271 {
 1272         struct ifaddr *ifa;
 1273         struct in6_addr *in6;
 1274         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 1275         int s, bound;
 1276 
 1277         KERNEL_LOCK(1, NULL);
 1278         bound = curlwp_bind();
 1279         s = pserialize_read_enter();
 1280         IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
 1281                 struct psref psref;
 1282 
 1283                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1284                         continue;
 1285 
 1286                 ifa_acquire(ifa, &psref);
 1287                 pserialize_read_exit(s);
 1288 
 1289                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
 1290                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
 1291                     ND_NA_FLAG_OVERRIDE, 1, NULL);
 1292 
 1293                 s = pserialize_read_enter();
 1294                 ifa_release(ifa, &psref);
 1295         }
 1296         pserialize_read_exit(s);
 1297         curlwp_bindx(bound);
 1298         KERNEL_UNLOCK_ONE(NULL);
 1299 }
 1300 #endif /* INET6 */
 1301 
 1302 /*
 1303  * Based on bridge_hash() in if_bridge.c
 1304  */
 1305 #define mix(a,b,c) \
 1306         do {                                            \
 1307                 a -= b; a -= c; a ^= (c >> 13);         \
 1308                 b -= c; b -= a; b ^= (a << 8);          \
 1309                 c -= a; c -= b; c ^= (b >> 13);         \
 1310                 a -= b; a -= c; a ^= (c >> 12);         \
 1311                 b -= c; b -= a; b ^= (a << 16);         \
 1312                 c -= a; c -= b; c ^= (b >> 5);          \
 1313                 a -= b; a -= c; a ^= (c >> 3);          \
 1314                 b -= c; b -= a; b ^= (a << 10);         \
 1315                 c -= a; c -= b; c ^= (b >> 15);         \
 1316         } while (0)
 1317 
 1318 static u_int32_t
 1319 carp_hash(struct carp_softc *sc, u_char *src)
 1320 {
 1321         u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
 1322 
 1323         c += sc->sc_key[3] << 24;
 1324         c += sc->sc_key[2] << 16;
 1325         c += sc->sc_key[1] << 8;
 1326         c += sc->sc_key[0];
 1327         b += src[5] << 8;
 1328         b += src[4];
 1329         a += src[3] << 24;
 1330         a += src[2] << 16;
 1331         a += src[1] << 8;
 1332         a += src[0];
 1333 
 1334         mix(a, b, c);
 1335         return (c);
 1336 }
 1337 
 1338 static int
 1339 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
 1340 {
 1341         struct carp_softc *vh;
 1342         struct ifaddr *ifa;
 1343         int count = 0;
 1344 
 1345         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1346                 if ((type == CARP_COUNT_RUNNING &&
 1347                     (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
 1348                     (IFF_UP|IFF_RUNNING)) ||
 1349                     (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
 1350                         int s = pserialize_read_enter();
 1351                         IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
 1352                                 if (ifa->ifa_addr->sa_family == AF_INET &&
 1353                                     ia->ia_addr.sin_addr.s_addr ==
 1354                                     ifatoia(ifa)->ia_addr.sin_addr.s_addr)
 1355                                         count++;
 1356                         }
 1357                         pserialize_read_exit(s);
 1358                 }
 1359         }
 1360         return (count);
 1361 }
 1362 
 1363 int
 1364 carp_iamatch(struct in_ifaddr *ia, u_char *src,
 1365     u_int32_t *count, u_int32_t index)
 1366 {
 1367         struct carp_softc *sc = ia->ia_ifp->if_softc;
 1368 
 1369         if (carp_opts[CARPCTL_ARPBALANCE]) {
 1370                 /*
 1371                  * We use the source ip to decide which virtual host should
 1372                  * handle the request. If we're master of that virtual host,
 1373                  * then we respond, otherwise, just drop the arp packet on
 1374                  * the floor.
 1375                  */
 1376 
 1377                 /* Count the elegible carp interfaces with this address */
 1378                 if (*count == 0)
 1379                         *count = carp_addrcount(
 1380                             (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
 1381                             ia, CARP_COUNT_RUNNING);
 1382 
 1383                 /* This should never happen, but... */
 1384                 if (*count == 0)
 1385                         return (0);
 1386 
 1387                 if (carp_hash(sc, src) % *count == index - 1 &&
 1388                     sc->sc_state == MASTER) {
 1389                         return (1);
 1390                 }
 1391         } else {
 1392                 if (sc->sc_state == MASTER)
 1393                         return (1);
 1394         }
 1395 
 1396         return (0);
 1397 }
 1398 
 1399 #ifdef INET6
 1400 struct ifaddr *
 1401 carp_iamatch6(void *v, struct in6_addr *taddr)
 1402 {
 1403         struct carp_if *cif = v;
 1404         struct carp_softc *vh;
 1405         struct ifaddr *ifa;
 1406 
 1407         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1408                 int s = pserialize_read_enter();
 1409                 IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
 1410                         if (IN6_ARE_ADDR_EQUAL(taddr,
 1411                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
 1412                             ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
 1413                             (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
 1414                                 return (ifa);
 1415                 }
 1416                 pserialize_read_exit(s);
 1417         }
 1418 
 1419         return (NULL);
 1420 }
 1421 #endif /* INET6 */
 1422 
 1423 struct ifnet *
 1424 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
 1425 {
 1426         struct carp_if *cif = (struct carp_if *)v;
 1427         struct carp_softc *vh;
 1428         u_int8_t *ena;
 1429 
 1430         if (src)
 1431                 ena = (u_int8_t *)&eh->ether_shost;
 1432         else
 1433                 ena = (u_int8_t *)&eh->ether_dhost;
 1434 
 1435         switch (iftype) {
 1436         case IFT_ETHER:
 1437         case IFT_FDDI:
 1438                 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
 1439                         return (NULL);
 1440                 break;
 1441         case IFT_ISO88025:
 1442                 if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
 1443                         return (NULL);
 1444                 break;
 1445         default:
 1446                 return (NULL);
 1447                 break;
 1448         }
 1449 
 1450         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
 1451                 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
 1452                     (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
 1453                     !memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
 1454                     ETHER_ADDR_LEN)) {
 1455                         return (&vh->sc_if);
 1456                     }
 1457 
 1458         return (NULL);
 1459 }
 1460 
 1461 int
 1462 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
 1463 {
 1464         struct ether_header eh;
 1465         struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp;
 1466         struct ifnet *ifp;
 1467 
 1468         memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
 1469         memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
 1470         eh.ether_type = etype;
 1471 
 1472         if (m->m_flags & (M_BCAST|M_MCAST)) {
 1473                 struct carp_softc *vh;
 1474                 struct mbuf *m0;
 1475 
 1476                 /*
 1477                  * XXX Should really check the list of multicast addresses
 1478                  * for each CARP interface _before_ copying.
 1479                  */
 1480                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1481                         m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
 1482                         if (m0 == NULL)
 1483                                 continue;
 1484                         m_set_rcvif(m0, &vh->sc_if);
 1485                         ether_input(&vh->sc_if, m0);
 1486                 }
 1487                 return (1);
 1488         }
 1489 
 1490         ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0);
 1491         if (ifp == NULL) {
 1492                 return (1);
 1493         }
 1494 
 1495         m_set_rcvif(m, ifp);
 1496 
 1497         bpf_mtap(ifp, m, BPF_D_IN);
 1498         if_statinc(ifp, if_ipackets);
 1499         ether_input(ifp, m);
 1500         return (0);
 1501 }
 1502 
 1503 static void
 1504 carp_master_down(void *v)
 1505 {
 1506         struct carp_softc *sc = v;
 1507 
 1508         switch (sc->sc_state) {
 1509         case INIT:
 1510                 printf("%s: master_down event in INIT state\n",
 1511                     sc->sc_if.if_xname);
 1512                 break;
 1513         case MASTER:
 1514                 break;
 1515         case BACKUP:
 1516                 CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
 1517                 carp_set_state(sc, MASTER);
 1518                 carp_send_ad(sc);
 1519                 carp_send_arp(sc);
 1520 #ifdef INET6
 1521                 carp_send_na(sc);
 1522 #endif /* INET6 */
 1523                 carp_setrun(sc, 0);
 1524                 carp_setroute(sc, RTM_ADD);
 1525                 break;
 1526         }
 1527 }
 1528 
 1529 /*
 1530  * When in backup state, af indicates whether to reset the master down timer
 1531  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
 1532  */
 1533 static void
 1534 carp_setrun(struct carp_softc *sc, sa_family_t af)
 1535 {
 1536         struct timeval tv;
 1537 
 1538         if (sc->sc_carpdev == NULL) {
 1539                 sc->sc_if.if_flags &= ~IFF_RUNNING;
 1540                 carp_set_state(sc, INIT);
 1541                 return;
 1542         }
 1543 
 1544         if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
 1545             (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
 1546                 sc->sc_if.if_flags |= IFF_RUNNING;
 1547         } else {
 1548                 sc->sc_if.if_flags &= ~IFF_RUNNING;
 1549                 carp_setroute(sc, RTM_DELETE);
 1550                 return;
 1551         }
 1552 
 1553         switch (sc->sc_state) {
 1554         case INIT:
 1555                 carp_set_state(sc, BACKUP);
 1556                 carp_setroute(sc, RTM_DELETE);
 1557                 carp_setrun(sc, 0);
 1558                 break;
 1559         case BACKUP:
 1560                 callout_stop(&sc->sc_ad_tmo);
 1561                 tv.tv_sec = 3 * sc->sc_advbase;
 1562                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1563                 switch (af) {
 1564 #ifdef INET
 1565                 case AF_INET:
 1566                         callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
 1567                         break;
 1568 #endif /* INET */
 1569 #ifdef INET6
 1570                 case AF_INET6:
 1571                         callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
 1572                         break;
 1573 #endif /* INET6 */
 1574                 default:
 1575                         if (sc->sc_naddrs)
 1576                                 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
 1577 #ifdef INET6
 1578                         if (sc->sc_naddrs6)
 1579                                 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
 1580 #endif /* INET6 */
 1581                         break;
 1582                 }
 1583                 break;
 1584         case MASTER:
 1585                 tv.tv_sec = sc->sc_advbase;
 1586                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1587                 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
 1588                 break;
 1589         }
 1590 }
 1591 
 1592 static void
 1593 carp_multicast_cleanup(struct carp_softc *sc)
 1594 {
 1595         struct ip_moptions *imo = &sc->sc_imo;
 1596 #ifdef INET6
 1597         struct ip6_moptions *im6o = &sc->sc_im6o;
 1598 #endif
 1599         u_int16_t n = imo->imo_num_memberships;
 1600 
 1601         /* Clean up our own multicast memberships */
 1602         while (n-- > 0) {
 1603                 if (imo->imo_membership[n] != NULL) {
 1604                         in_delmulti(imo->imo_membership[n]);
 1605                         imo->imo_membership[n] = NULL;
 1606                 }
 1607         }
 1608         imo->imo_num_memberships = 0;
 1609         imo->imo_multicast_if_index = 0;
 1610 
 1611 #ifdef INET6
 1612         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
 1613                 struct in6_multi_mship *imm =
 1614                     LIST_FIRST(&im6o->im6o_memberships);
 1615 
 1616                 LIST_REMOVE(imm, i6mm_chain);
 1617                 in6_leavegroup(imm);
 1618         }
 1619         im6o->im6o_multicast_if_index = 0;
 1620 #endif
 1621 
 1622         /* And any other multicast memberships */
 1623         carp_ether_purgemulti(sc);
 1624 }
 1625 
 1626 static int
 1627 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
 1628 {
 1629         struct carp_if *cif, *ncif = NULL;
 1630         struct carp_softc *vr, *after = NULL;
 1631         int myself = 0, error = 0;
 1632         int s;
 1633 
 1634         if (ifp == sc->sc_carpdev)
 1635                 return (0);
 1636 
 1637         if (ifp != NULL) {
 1638                 if ((ifp->if_flags & IFF_MULTICAST) == 0)
 1639                         return (EADDRNOTAVAIL);
 1640 
 1641                 if (ifp->if_type == IFT_CARP)
 1642                         return (EINVAL);
 1643 
 1644                 if (ifp->if_carp == NULL) {
 1645                         ncif = malloc(sizeof(*cif), M_IFADDR, M_WAITOK);
 1646                         if ((error = ifpromisc(ifp, 1))) {
 1647                                 free(ncif, M_IFADDR);
 1648                                 return (error);
 1649                         }
 1650 
 1651                         ncif->vhif_ifp = ifp;
 1652                         TAILQ_INIT(&ncif->vhif_vrs);
 1653                 } else {
 1654                         cif = (struct carp_if *)ifp->if_carp;
 1655                         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 1656                                 if (vr != sc && vr->sc_vhid == sc->sc_vhid)
 1657                                         return (EINVAL);
 1658                 }
 1659 
 1660                 /* detach from old interface */
 1661                 if (sc->sc_carpdev != NULL)
 1662                         carpdetach(sc);
 1663 
 1664                 /* join multicast groups */
 1665                 if (sc->sc_naddrs < 0 &&
 1666                     (error = carp_join_multicast(sc)) != 0) {
 1667                         if (ncif != NULL)
 1668                                 free(ncif, M_IFADDR);
 1669                         return (error);
 1670                 }
 1671 
 1672 #ifdef INET6
 1673                 if (sc->sc_naddrs6 < 0 &&
 1674                     (error = carp_join_multicast6(sc)) != 0) {
 1675                         if (ncif != NULL)
 1676                                 free(ncif, M_IFADDR);
 1677                         carp_multicast_cleanup(sc);
 1678                         return (error);
 1679                 }
 1680 #endif
 1681 
 1682                 /* attach carp interface to physical interface */
 1683                 if (ncif != NULL)
 1684                         ifp->if_carp = (void *)ncif;
 1685                 sc->sc_carpdev = ifp;
 1686                 sc->sc_if.if_capabilities = ifp->if_capabilities &
 1687                              (IFCAP_TSOv4 | IFCAP_TSOv6 |
 1688                              IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx|
 1689                              IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx|
 1690                              IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx|
 1691                              IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx|
 1692                              IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx);
 1693 
 1694                 cif = (struct carp_if *)ifp->if_carp;
 1695                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
 1696                         if (vr == sc)
 1697                                 myself = 1;
 1698                         if (vr->sc_vhid < sc->sc_vhid)
 1699                                 after = vr;
 1700                 }
 1701 
 1702                 if (!myself) {
 1703                         /* We're trying to keep things in order */
 1704                         if (after == NULL) {
 1705                                 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
 1706                         } else {
 1707                                 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
 1708                                     sc, sc_list);
 1709                         }
 1710                         cif->vhif_nvrs++;
 1711                 }
 1712                 if (sc->sc_naddrs || sc->sc_naddrs6)
 1713                         sc->sc_if.if_flags |= IFF_UP;
 1714                 carp_set_enaddr(sc);
 1715                 sc->sc_linkstate_hook = if_linkstate_change_establish(ifp,
 1716                     carp_carpdev_state, (void *)ifp);
 1717                 KERNEL_LOCK(1, NULL);
 1718                 s = splnet();
 1719                 carp_carpdev_state(ifp);
 1720                 splx(s);
 1721                 KERNEL_UNLOCK_ONE(NULL);
 1722         } else {
 1723                 carpdetach(sc);
 1724                 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
 1725         }
 1726         return (0);
 1727 }
 1728 
 1729 static void
 1730 carp_set_enaddr(struct carp_softc *sc)
 1731 {
 1732         struct ifnet *ifp = &sc->sc_if;
 1733         uint8_t enaddr[ETHER_ADDR_LEN];
 1734 
 1735         if (sc->sc_vhid == -1) {
 1736                 ifp->if_addrlen = 0;
 1737                 if_alloc_sadl(ifp);
 1738                 return;
 1739         }
 1740 
 1741         if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
 1742                 enaddr[0] = 3;
 1743                 enaddr[1] = 0;
 1744                 enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
 1745                 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
 1746                 enaddr[4] = 0;
 1747                 enaddr[5] = 0;
 1748         } else {
 1749                 enaddr[0] = 0;
 1750                 enaddr[1] = 0;
 1751                 enaddr[2] = 0x5e;
 1752                 enaddr[3] = 0;
 1753                 enaddr[4] = 1;
 1754                 enaddr[5] = sc->sc_vhid;
 1755         }
 1756 
 1757         if_set_sadl(ifp, enaddr, sizeof(enaddr), false);
 1758 }
 1759 
 1760 #if 0
 1761 static void
 1762 carp_addr_updated(void *v)
 1763 {
 1764         struct carp_softc *sc = (struct carp_softc *) v;
 1765         struct ifaddr *ifa;
 1766         int new_naddrs = 0, new_naddrs6 = 0;
 1767 
 1768         IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
 1769                 if (ifa->ifa_addr->sa_family == AF_INET)
 1770                         new_naddrs++;
 1771                 else if (ifa->ifa_addr->sa_family == AF_INET6)
 1772                         new_naddrs6++;
 1773         }
 1774 
 1775         /* Handle a callback after SIOCDIFADDR */
 1776         if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
 1777                 struct in_addr mc_addr;
 1778 
 1779                 sc->sc_naddrs = new_naddrs;
 1780                 sc->sc_naddrs6 = new_naddrs6;
 1781 
 1782                 /* Re-establish multicast membership removed by in_control */
 1783                 mc_addr.s_addr = INADDR_CARP_GROUP;
 1784                 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
 1785                         memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
 1786 
 1787                         if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
 1788                                 carp_join_multicast(sc);
 1789                 }
 1790 
 1791                 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
 1792                         sc->sc_if.if_flags &= ~IFF_UP;
 1793                         carp_set_state(sc, INIT);
 1794                 } else
 1795                         carp_hmac_prepare(sc);
 1796         }
 1797 
 1798         carp_setrun(sc, 0);
 1799 }
 1800 #endif
 1801 
 1802 static int
 1803 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
 1804 {
 1805         struct ifnet *ifp = sc->sc_carpdev;
 1806         struct in_ifaddr *ia, *ia_if;
 1807         int error = 0;
 1808         int s;
 1809 
 1810         if (sin->sin_addr.s_addr == 0) {
 1811                 if (!(sc->sc_if.if_flags & IFF_UP))
 1812                         carp_set_state(sc, INIT);
 1813                 if (sc->sc_naddrs)
 1814                         sc->sc_if.if_flags |= IFF_UP;
 1815                 carp_setrun(sc, 0);
 1816                 return (0);
 1817         }
 1818 
 1819         /* we have to do this by hand to ensure we don't match on ourselves */
 1820         ia_if = NULL;
 1821         s = pserialize_read_enter();
 1822         IN_ADDRLIST_READER_FOREACH(ia) {
 1823                 /* and, yeah, we need a multicast-capable iface too */
 1824                 if (ia->ia_ifp != &sc->sc_if &&
 1825                     ia->ia_ifp->if_type != IFT_CARP &&
 1826                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
 1827                     (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
 1828                     ia->ia_subnet) {
 1829                         if (!ia_if)
 1830                                 ia_if = ia;
 1831                 }
 1832         }
 1833 
 1834         if (ia_if) {
 1835                 ia = ia_if;
 1836                 if (ifp) {
 1837                         if (ifp != ia->ia_ifp)
 1838                                 return (EADDRNOTAVAIL);
 1839                 } else {
 1840                         /* FIXME NOMPSAFE */
 1841                         ifp = ia->ia_ifp;
 1842                 }
 1843         }
 1844         pserialize_read_exit(s);
 1845 
 1846         if ((error = carp_set_ifp(sc, ifp)))
 1847                 return (error);
 1848 
 1849         if (sc->sc_carpdev == NULL)
 1850                 return (EADDRNOTAVAIL);
 1851 
 1852         if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
 1853                 return (error);
 1854 
 1855         sc->sc_naddrs++;
 1856         if (sc->sc_carpdev != NULL)
 1857                 sc->sc_if.if_flags |= IFF_UP;
 1858 
 1859         carp_set_state(sc, INIT);
 1860         carp_setrun(sc, 0);
 1861 
 1862         /*
 1863          * Hook if_addrhooks so that we get a callback after in_ifinit has run,
 1864          * to correct any inappropriate routes that it inserted.
 1865          */
 1866         if (sc->ah_cookie == 0) {
 1867                 /* XXX link address hook */
 1868         }
 1869 
 1870         return (0);
 1871 }
 1872 
 1873 static int
 1874 carp_join_multicast(struct carp_softc *sc)
 1875 {
 1876         struct ip_moptions *imo = &sc->sc_imo, tmpimo;
 1877         struct in_addr addr;
 1878 
 1879         memset(&tmpimo, 0, sizeof(tmpimo));
 1880         addr.s_addr = INADDR_CARP_GROUP;
 1881         if ((tmpimo.imo_membership[0] =
 1882             in_addmulti(&addr, &sc->sc_if)) == NULL) {
 1883                 return (ENOBUFS);
 1884         }
 1885 
 1886         imo->imo_membership[0] = tmpimo.imo_membership[0];
 1887         imo->imo_num_memberships = 1;
 1888         imo->imo_multicast_if_index = sc->sc_if.if_index;
 1889         imo->imo_multicast_ttl = CARP_DFLTTL;
 1890         imo->imo_multicast_loop = 0;
 1891         return (0);
 1892 }
 1893 
 1894 
 1895 #ifdef INET6
 1896 static int
 1897 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
 1898 {
 1899         struct ifnet *ifp = sc->sc_carpdev;
 1900         struct in6_ifaddr *ia, *ia_if;
 1901         int error = 0;
 1902         int s;
 1903 
 1904         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 1905                 if (!(sc->sc_if.if_flags & IFF_UP))
 1906                         carp_set_state(sc, INIT);
 1907                 if (sc->sc_naddrs6)
 1908                         sc->sc_if.if_flags |= IFF_UP;
 1909                 carp_setrun(sc, 0);
 1910                 return (0);
 1911         }
 1912 
 1913         /* we have to do this by hand to ensure we don't match on ourselves */
 1914         ia_if = NULL;
 1915         s = pserialize_read_enter();
 1916         IN6_ADDRLIST_READER_FOREACH(ia) {
 1917                 int i;
 1918 
 1919                 for (i = 0; i < 4; i++) {
 1920                         if ((sin6->sin6_addr.s6_addr32[i] &
 1921                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
 1922                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
 1923                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
 1924                                 break;
 1925                 }
 1926                 /* and, yeah, we need a multicast-capable iface too */
 1927                 if (ia->ia_ifp != &sc->sc_if &&
 1928                     ia->ia_ifp->if_type != IFT_CARP &&
 1929                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
 1930                     (i == 4)) {
 1931                         if (!ia_if)
 1932                                 ia_if = ia;
 1933                 }
 1934         }
 1935         pserialize_read_exit(s);
 1936 
 1937         if (ia_if) {
 1938                 ia = ia_if;
 1939                 if (sc->sc_carpdev) {
 1940                         if (sc->sc_carpdev != ia->ia_ifp)
 1941                                 return (EADDRNOTAVAIL);
 1942                 } else {
 1943                         ifp = ia->ia_ifp;
 1944                 }
 1945         }
 1946 
 1947         if ((error = carp_set_ifp(sc, ifp)))
 1948                 return (error);
 1949 
 1950         if (sc->sc_carpdev == NULL)
 1951                 return (EADDRNOTAVAIL);
 1952 
 1953         if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
 1954                 return (error);
 1955 
 1956         sc->sc_naddrs6++;
 1957         if (sc->sc_carpdev != NULL)
 1958                 sc->sc_if.if_flags |= IFF_UP;
 1959         carp_set_state(sc, INIT);
 1960         carp_setrun(sc, 0);
 1961 
 1962         return (0);
 1963 }
 1964 
 1965 static int
 1966 carp_join_multicast6(struct carp_softc *sc)
 1967 {
 1968         struct in6_multi_mship *imm, *imm2;
 1969         struct ip6_moptions *im6o = &sc->sc_im6o;
 1970         struct sockaddr_in6 addr6;
 1971         int error;
 1972 
 1973         /* Join IPv6 CARP multicast group */
 1974         memset(&addr6, 0, sizeof(addr6));
 1975         addr6.sin6_family = AF_INET6;
 1976         addr6.sin6_len = sizeof(addr6);
 1977         addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
 1978         addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
 1979         addr6.sin6_addr.s6_addr8[15] = 0x12;
 1980         if ((imm = in6_joingroup(&sc->sc_if,
 1981             &addr6.sin6_addr, &error, 0)) == NULL) {
 1982                 return (error);
 1983         }
 1984         /* join solicited multicast address */
 1985         memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
 1986         addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
 1987         addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
 1988         addr6.sin6_addr.s6_addr32[1] = 0;
 1989         addr6.sin6_addr.s6_addr32[2] = htonl(1);
 1990         addr6.sin6_addr.s6_addr32[3] = 0;
 1991         addr6.sin6_addr.s6_addr8[12] = 0xff;
 1992         if ((imm2 = in6_joingroup(&sc->sc_if,
 1993             &addr6.sin6_addr, &error, 0)) == NULL) {
 1994                 in6_leavegroup(imm);
 1995                 return (error);
 1996         }
 1997 
 1998         /* apply v6 multicast membership */
 1999         im6o->im6o_multicast_if_index = sc->sc_if.if_index;
 2000         if (imm)
 2001                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
 2002                     i6mm_chain);
 2003         if (imm2)
 2004                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
 2005                     i6mm_chain);
 2006 
 2007         return (0);
 2008 }
 2009 
 2010 #endif /* INET6 */
 2011 
 2012 static int
 2013 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
 2014 {
 2015         struct lwp *l = curlwp;         /* XXX */
 2016         struct carp_softc *sc = ifp->if_softc, *vr;
 2017         struct carpreq carpr;
 2018         struct ifaddr *ifa;
 2019         struct ifreq *ifr;
 2020         struct ifnet *cdev = NULL;
 2021         int error = 0;
 2022 
 2023         ifa = (struct ifaddr *)data;
 2024         ifr = (struct ifreq *)data;
 2025 
 2026         switch (cmd) {
 2027         case SIOCINITIFADDR:
 2028                 switch (ifa->ifa_addr->sa_family) {
 2029 #ifdef INET
 2030                 case AF_INET:
 2031                         sc->sc_if.if_flags |= IFF_UP;
 2032                         memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
 2033                             sizeof(struct sockaddr));
 2034                         error = carp_set_addr(sc, satosin(ifa->ifa_addr));
 2035                         break;
 2036 #endif /* INET */
 2037 #ifdef INET6
 2038                 case AF_INET6:
 2039                         sc->sc_if.if_flags|= IFF_UP;
 2040                         error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
 2041                         break;
 2042 #endif /* INET6 */
 2043                 default:
 2044                         error = EAFNOSUPPORT;
 2045                         break;
 2046                 }
 2047                 break;
 2048 
 2049         case SIOCSIFFLAGS:
 2050                 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
 2051                         break;
 2052                 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
 2053                         callout_stop(&sc->sc_ad_tmo);
 2054                         callout_stop(&sc->sc_md_tmo);
 2055                         callout_stop(&sc->sc_md6_tmo);
 2056                         if (sc->sc_state == MASTER) {
 2057                                 /* we need the interface up to bow out */
 2058                                 sc->sc_if.if_flags |= IFF_UP;
 2059                                 sc->sc_bow_out = 1;
 2060                                 carp_send_ad(sc);
 2061                         }
 2062                         sc->sc_if.if_flags &= ~IFF_UP;
 2063                         carp_set_state(sc, INIT);
 2064                         carp_setrun(sc, 0);
 2065                 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
 2066                         sc->sc_if.if_flags |= IFF_UP;
 2067                         carp_setrun(sc, 0);
 2068                 }
 2069                 carp_update_link_state(sc);
 2070                 break;
 2071 
 2072         case SIOCSVH:
 2073                 if (l == NULL)
 2074                         break;
 2075                 if ((error = kauth_authorize_network(l->l_cred,
 2076                     KAUTH_NETWORK_INTERFACE,
 2077                     KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
 2078                     NULL)) != 0)
 2079                         break;
 2080                 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
 2081                         break;
 2082                 error = 1;
 2083                 if (carpr.carpr_carpdev[0] != '\0' &&
 2084                     (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
 2085                         return (EINVAL);
 2086                 if ((error = carp_set_ifp(sc, cdev)))
 2087                         return (error);
 2088                 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
 2089                         switch (carpr.carpr_state) {
 2090                         case BACKUP:
 2091                                 callout_stop(&sc->sc_ad_tmo);
 2092                                 carp_set_state(sc, BACKUP);
 2093                                 carp_setrun(sc, 0);
 2094                                 carp_setroute(sc, RTM_DELETE);
 2095                                 break;
 2096                         case MASTER:
 2097                                 carp_master_down(sc);
 2098                                 break;
 2099                         default:
 2100                                 break;
 2101                         }
 2102                 }
 2103                 if (carpr.carpr_vhid > 0) {
 2104                         if (carpr.carpr_vhid > 255) {
 2105                                 error = EINVAL;
 2106                                 break;
 2107                         }
 2108                         if (sc->sc_carpdev) {
 2109                                 struct carp_if *cif;
 2110                                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 2111                                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 2112                                         if (vr != sc &&
 2113                                             vr->sc_vhid == carpr.carpr_vhid)
 2114                                                 return (EINVAL);
 2115                         }
 2116                         sc->sc_vhid = carpr.carpr_vhid;
 2117                         carp_set_enaddr(sc);
 2118                         carp_set_state(sc, INIT);
 2119                         error--;
 2120                 }
 2121                 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
 2122                         if (carpr.carpr_advskew > 254) {
 2123                                 error = EINVAL;
 2124                                 break;
 2125                         }
 2126                         if (carpr.carpr_advbase > 255) {
 2127                                 error = EINVAL;
 2128                                 break;
 2129                         }
 2130                         sc->sc_advbase = carpr.carpr_advbase;
 2131                         sc->sc_advskew = carpr.carpr_advskew;
 2132                         error--;
 2133                 }
 2134                 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
 2135                 if (error > 0)
 2136                         error = EINVAL;
 2137                 else {
 2138                         error = 0;
 2139                         carp_setrun(sc, 0);
 2140                 }
 2141                 break;
 2142 
 2143         case SIOCGVH:
 2144                 memset(&carpr, 0, sizeof(carpr));
 2145                 if (sc->sc_carpdev != NULL)
 2146                         strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
 2147                             IFNAMSIZ);
 2148                 carpr.carpr_state = sc->sc_state;
 2149                 carpr.carpr_vhid = sc->sc_vhid;
 2150                 carpr.carpr_advbase = sc->sc_advbase;
 2151                 carpr.carpr_advskew = sc->sc_advskew;
 2152 
 2153                 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
 2154                     KAUTH_NETWORK_INTERFACE,
 2155                     KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
 2156                     NULL)) == 0)
 2157                         memcpy(carpr.carpr_key, sc->sc_key,
 2158                             sizeof(carpr.carpr_key));
 2159                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
 2160                 break;
 2161 
 2162         case SIOCADDMULTI:
 2163                 error = carp_ether_addmulti(sc, ifr);
 2164                 break;
 2165 
 2166         case SIOCDELMULTI:
 2167                 error = carp_ether_delmulti(sc, ifr);
 2168                 break;
 2169 
 2170         case SIOCSIFCAP:
 2171                 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
 2172                         error = 0;
 2173                 break;
 2174 
 2175         default:
 2176                 error = ether_ioctl(ifp, cmd, data);
 2177         }
 2178 
 2179         carp_hmac_prepare(sc);
 2180         return (error);
 2181 }
 2182 
 2183 
 2184 /*
 2185  * Start output on carp interface. This function should never be called.
 2186  */
 2187 static void
 2188 carp_start(struct ifnet *ifp)
 2189 {
 2190 #ifdef DEBUG
 2191         printf("%s: start called\n", ifp->if_xname);
 2192 #endif
 2193 }
 2194 
 2195 int
 2196 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
 2197     const struct rtentry *rt)
 2198 {
 2199         struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
 2200         KASSERT(KERNEL_LOCKED_P());
 2201 
 2202         if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
 2203                 return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt);
 2204         } else {
 2205                 m_freem(m);
 2206                 return (ENETUNREACH);
 2207         }
 2208 }
 2209 
 2210 static void
 2211 carp_set_state(struct carp_softc *sc, int state)
 2212 {
 2213         static const char *carp_states[] = { CARP_STATES };
 2214 
 2215         if (sc->sc_state == state)
 2216                 return;
 2217 
 2218         CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
 2219 
 2220         sc->sc_state = state;
 2221         carp_update_link_state(sc);
 2222 }
 2223 
 2224 static void
 2225 carp_update_link_state(struct carp_softc *sc)
 2226 {
 2227         int link_state;
 2228 
 2229         switch (sc->sc_state) {
 2230         case BACKUP:
 2231                 link_state = LINK_STATE_DOWN;
 2232                 break;
 2233         case MASTER:
 2234                 link_state = LINK_STATE_UP;
 2235                 break;
 2236         default:
 2237                 /* Not useable, so down makes perfect sense. */
 2238                 link_state = LINK_STATE_DOWN;
 2239                 break;
 2240         }
 2241         if_link_state_change(&sc->sc_if, link_state);
 2242 }
 2243 
 2244 void
 2245 carp_carpdev_state(void *v)
 2246 {
 2247         struct carp_if *cif;
 2248         struct carp_softc *sc;
 2249         struct ifnet *ifp = v;
 2250 
 2251         if (ifp->if_type == IFT_CARP)
 2252                 return;
 2253 
 2254         cif = (struct carp_if *)ifp->if_carp;
 2255 
 2256         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
 2257                 int suppressed = sc->sc_suppress;
 2258 
 2259                 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
 2260                     !(sc->sc_carpdev->if_flags & IFF_UP)) {
 2261                         sc->sc_if.if_flags &= ~IFF_RUNNING;
 2262                         callout_stop(&sc->sc_ad_tmo);
 2263                         callout_stop(&sc->sc_md_tmo);
 2264                         callout_stop(&sc->sc_md6_tmo);
 2265                         carp_set_state(sc, INIT);
 2266                         sc->sc_suppress = 1;
 2267                         carp_setrun(sc, 0);
 2268                         if (!suppressed) {
 2269                                 carp_suppress_preempt++;
 2270                                 if (carp_suppress_preempt == 1)
 2271                                         carp_send_ad_all();
 2272                         }
 2273                 } else {
 2274                         carp_set_state(sc, INIT);
 2275                         sc->sc_suppress = 0;
 2276                         carp_setrun(sc, 0);
 2277                         if (suppressed)
 2278                                 carp_suppress_preempt--;
 2279                 }
 2280         }
 2281 }
 2282 
 2283 static int
 2284 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
 2285 {
 2286         const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
 2287         struct ifnet *ifp;
 2288         struct carp_mc_entry *mc;
 2289         u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
 2290         int error;
 2291 
 2292         ifp = sc->sc_carpdev;
 2293         if (ifp == NULL)
 2294                 return (EINVAL);
 2295 
 2296         error = ether_addmulti(sa, &sc->sc_ac);
 2297         if (error != ENETRESET)
 2298                 return (error);
 2299 
 2300         /*
 2301          * This is new multicast address.  We have to tell parent
 2302          * about it.  Also, remember this multicast address so that
 2303          * we can delete them on unconfigure.
 2304          */
 2305         mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
 2306         if (mc == NULL) {
 2307                 error = ENOMEM;
 2308                 goto alloc_failed;
 2309         }
 2310 
 2311         /*
 2312          * As ether_addmulti() returns ENETRESET, following two
 2313          * statement shouldn't fail.
 2314          */
 2315         (void)ether_multiaddr(sa, addrlo, addrhi);
 2316 
 2317         ETHER_LOCK(&sc->sc_ac);
 2318         mc->mc_enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac);
 2319         ETHER_UNLOCK(&sc->sc_ac);
 2320 
 2321         memcpy(&mc->mc_addr, sa, sa->sa_len);
 2322         LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
 2323 
 2324         error = if_mcast_op(ifp, SIOCADDMULTI, sa);
 2325         if (error != 0)
 2326                 goto ioctl_failed;
 2327 
 2328         return (error);
 2329 
 2330  ioctl_failed:
 2331         LIST_REMOVE(mc, mc_entries);
 2332         free(mc, M_DEVBUF);
 2333  alloc_failed:
 2334         (void)ether_delmulti(sa, &sc->sc_ac);
 2335 
 2336         return (error);
 2337 }
 2338 
 2339 static int
 2340 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
 2341 {
 2342         const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
 2343         struct ifnet *ifp;
 2344         struct ether_multi *enm;
 2345         struct carp_mc_entry *mc;
 2346         u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
 2347         int error;
 2348 
 2349         ifp = sc->sc_carpdev;
 2350         if (ifp == NULL)
 2351                 return (EINVAL);
 2352 
 2353         /*
 2354          * Find a key to lookup carp_mc_entry.  We have to do this
 2355          * before calling ether_delmulti for obvious reason.
 2356          */
 2357         if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
 2358                 return (error);
 2359 
 2360         ETHER_LOCK(&sc->sc_ac);
 2361         enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac);
 2362         ETHER_UNLOCK(&sc->sc_ac);
 2363         if (enm == NULL)
 2364                 return (EINVAL);
 2365 
 2366         LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
 2367                 if (mc->mc_enm == enm)
 2368                         break;
 2369 
 2370         /* We won't delete entries we didn't add */
 2371         if (mc == NULL)
 2372                 return (EINVAL);
 2373 
 2374         error = ether_delmulti(sa, &sc->sc_ac);
 2375         if (error != ENETRESET)
 2376                 return (error);
 2377 
 2378         /* We no longer use this multicast address.  Tell parent so. */
 2379         error = if_mcast_op(ifp, SIOCDELMULTI, sa);
 2380         if (error == 0) {
 2381                 /* And forget about this address. */
 2382                 LIST_REMOVE(mc, mc_entries);
 2383                 free(mc, M_DEVBUF);
 2384         } else
 2385                 (void)ether_addmulti(sa, &sc->sc_ac);
 2386         return (error);
 2387 }
 2388 
 2389 /*
 2390  * Delete any multicast address we have asked to add from parent
 2391  * interface.  Called when the carp is being unconfigured.
 2392  */
 2393 static void
 2394 carp_ether_purgemulti(struct carp_softc *sc)
 2395 {
 2396         struct ifnet *ifp = sc->sc_carpdev;             /* Parent. */
 2397         struct carp_mc_entry *mc;
 2398 
 2399         if (ifp == NULL)
 2400                 return;
 2401 
 2402         while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
 2403                 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
 2404                 LIST_REMOVE(mc, mc_entries);
 2405                 free(mc, M_DEVBUF);
 2406         }
 2407 }
 2408 
 2409 static int
 2410 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
 2411 {
 2412 
 2413         return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
 2414 }
 2415 
 2416 void
 2417 carp_init(void)
 2418 {
 2419 
 2420         sysctl_net_inet_carp_setup(NULL);
 2421 #ifdef MBUFTRACE
 2422         MOWNER_ATTACH(&carp_proto_mowner_rx);
 2423         MOWNER_ATTACH(&carp_proto_mowner_tx);
 2424         MOWNER_ATTACH(&carp_proto6_mowner_rx);
 2425         MOWNER_ATTACH(&carp_proto6_mowner_tx);
 2426 #endif
 2427 
 2428         carp_wqinput = wqinput_create("carp", _carp_proto_input);
 2429 #ifdef INET6
 2430         carp6_wqinput = wqinput_create("carp6", _carp6_proto_input);
 2431 #endif
 2432 }
 2433 
 2434 static void
 2435 sysctl_net_inet_carp_setup(struct sysctllog **clog)
 2436 {
 2437 
 2438         sysctl_createv(clog, 0, NULL, NULL,
 2439                        CTLFLAG_PERMANENT,
 2440                        CTLTYPE_NODE, "inet", NULL,
 2441                        NULL, 0, NULL, 0,
 2442                        CTL_NET, PF_INET, CTL_EOL);
 2443         sysctl_createv(clog, 0, NULL, NULL,
 2444                        CTLFLAG_PERMANENT,
 2445                        CTLTYPE_NODE, "carp",
 2446                        SYSCTL_DESCR("CARP related settings"),
 2447                        NULL, 0, NULL, 0,
 2448                        CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
 2449 
 2450         sysctl_createv(clog, 0, NULL, NULL,
 2451                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2452                        CTLTYPE_INT, "preempt",
 2453                        SYSCTL_DESCR("Enable CARP Preempt"),
 2454                        NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
 2455                        CTL_NET, PF_INET, IPPROTO_CARP,
 2456                        CTL_CREATE, CTL_EOL);
 2457         sysctl_createv(clog, 0, NULL, NULL,
 2458                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2459                        CTLTYPE_INT, "arpbalance",
 2460                        SYSCTL_DESCR("Enable ARP balancing"),
 2461                        NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
 2462                        CTL_NET, PF_INET, IPPROTO_CARP,
 2463                        CTL_CREATE, CTL_EOL);
 2464         sysctl_createv(clog, 0, NULL, NULL,
 2465                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2466                        CTLTYPE_INT, "allow",
 2467                        SYSCTL_DESCR("Enable CARP"),
 2468                        NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
 2469                        CTL_NET, PF_INET, IPPROTO_CARP,
 2470                        CTL_CREATE, CTL_EOL);
 2471         sysctl_createv(clog, 0, NULL, NULL,
 2472                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2473                        CTLTYPE_INT, "log",
 2474                        SYSCTL_DESCR("CARP logging"),
 2475                        NULL, 0, &carp_opts[CARPCTL_LOG], 0,
 2476                        CTL_NET, PF_INET, IPPROTO_CARP,
 2477                        CTL_CREATE, CTL_EOL);
 2478         sysctl_createv(clog, 0, NULL, NULL,
 2479                        CTLFLAG_PERMANENT,
 2480                        CTLTYPE_STRUCT, "stats",
 2481                        SYSCTL_DESCR("CARP statistics"),
 2482                        sysctl_net_inet_carp_stats, 0, NULL, 0,
 2483                        CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
 2484                        CTL_EOL);
 2485 }

Cache object: d57a280abb45d3b44c02740463f1bc27


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.