[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_carp.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*
  2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
  3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
  4  *
  5  * Redistribution and use in source and binary forms, with or without
  6  * modification, are permitted provided that the following conditions
  7  * are met:
  8  * 1. Redistributions of source code must retain the above copyright
  9  *    notice, this list of conditions and the following disclaimer.
 10  * 2. Redistributions in binary form must reproduce the above copyright
 11  *    notice, this list of conditions and the following disclaimer in the
 12  *    documentation and/or other materials provided with the distribution.
 13  *
 14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
 18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 24  * THE POSSIBILITY OF SUCH DAMAGE.
 25  */
 26 
 27 #include <sys/cdefs.h>
 28 __FBSDID("$FreeBSD: src/sys/netinet/ip_carp.c,v 1.59 2008/12/02 21:37:28 bz Exp $");
 29 
 30 #include "opt_carp.h"
 31 #include "opt_bpf.h"
 32 #include "opt_inet.h"
 33 #include "opt_inet6.h"
 34 
 35 #include <sys/types.h>
 36 #include <sys/param.h>
 37 #include <sys/systm.h>
 38 #include <sys/conf.h>
 39 #include <sys/kernel.h>
 40 #include <sys/limits.h>
 41 #include <sys/malloc.h>
 42 #include <sys/mbuf.h>
 43 #include <sys/module.h>
 44 #include <sys/time.h>
 45 #include <sys/priv.h>
 46 #include <sys/proc.h>
 47 #include <sys/sysctl.h>
 48 #include <sys/syslog.h>
 49 #include <sys/signalvar.h>
 50 #include <sys/filio.h>
 51 #include <sys/sockio.h>
 52 
 53 #include <sys/socket.h>
 54 #include <sys/vnode.h>
 55 #include <sys/vimage.h>
 56 
 57 #include <machine/stdarg.h>
 58 
 59 #include <net/bpf.h>
 60 #include <net/ethernet.h>
 61 #include <net/fddi.h>
 62 #include <net/iso88025.h>
 63 #include <net/if.h>
 64 #include <net/if_clone.h>
 65 #include <net/if_dl.h>
 66 #include <net/if_types.h>
 67 #include <net/route.h>
 68 
 69 #ifdef INET
 70 #include <netinet/in.h>
 71 #include <netinet/in_var.h>
 72 #include <netinet/in_systm.h>
 73 #include <netinet/ip.h>
 74 #include <netinet/ip_var.h>
 75 #include <netinet/if_ether.h>
 76 #include <machine/in_cksum.h>
 77 #include <netinet/vinet.h>
 78 #endif
 79 
 80 #ifdef INET6
 81 #include <netinet/icmp6.h>
 82 #include <netinet/ip6.h>
 83 #include <netinet6/ip6_var.h>
 84 #include <netinet6/scope6_var.h>
 85 #include <netinet6/nd6.h>
 86 #include <netinet6/vinet6.h>
 87 #endif
 88 
 89 #include <crypto/sha1.h>
 90 #include <netinet/ip_carp.h>
 91 
 92 #define CARP_IFNAME     "carp"
 93 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
 94 SYSCTL_DECL(_net_inet_carp);
 95 
 96 struct carp_softc {
 97         struct ifnet            *sc_ifp;        /* Interface clue */
 98         struct ifnet            *sc_carpdev;    /* Pointer to parent interface */
 99         struct in_ifaddr        *sc_ia;         /* primary iface address */
100         struct ip_moptions       sc_imo;
101 #ifdef INET6
102         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
103         struct ip6_moptions      sc_im6o;
104 #endif /* INET6 */
105         TAILQ_ENTRY(carp_softc)  sc_list;
106 
107         enum { INIT = 0, BACKUP, MASTER }       sc_state;
108 
109         int                      sc_flags_backup;
110         int                      sc_suppress;
111 
112         int                      sc_sendad_errors;
113 #define CARP_SENDAD_MAX_ERRORS  3
114         int                      sc_sendad_success;
115 #define CARP_SENDAD_MIN_SUCCESS 3
116 
117         int                      sc_vhid;
118         int                      sc_advskew;
119         int                      sc_naddrs;
120         int                      sc_naddrs6;
121         int                      sc_advbase;    /* seconds */
122         int                      sc_init_counter;
123         u_int64_t                sc_counter;
124 
125         /* authentication */
126 #define CARP_HMAC_PAD   64
127         unsigned char sc_key[CARP_KEY_LEN];
128         unsigned char sc_pad[CARP_HMAC_PAD];
129         SHA1_CTX sc_sha1;
130 
131         struct callout           sc_ad_tmo;     /* advertisement timeout */
132         struct callout           sc_md_tmo;     /* master down timeout */
133         struct callout           sc_md6_tmo;    /* master down timeout */
134         
135         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
136 };
137 #define SC2IFP(sc)      ((sc)->sc_ifp)
138 
139 int carp_suppress_preempt = 0;
140 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 };    /* XXX for now */
141 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
142     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
143 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
144     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
145 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
146     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
147 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
148     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
149 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
150     &carp_suppress_preempt, 0, "Preemption is suppressed");
151 
152 struct carpstats carpstats;
153 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
154     &carpstats, carpstats,
155     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
156 
157 struct carp_if {
158         TAILQ_HEAD(, carp_softc) vhif_vrs;
159         int vhif_nvrs;
160 
161         struct ifnet    *vhif_ifp;
162         struct mtx       vhif_mtx;
163 };
164 
165 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
166 #define SC2CIF(sc)              ((struct carp_if *)(sc)->sc_carpdev->if_carp)
167 
168 /* lock per carp_if queue */
169 #define CARP_LOCK_INIT(cif)     mtx_init(&(cif)->vhif_mtx, "carp_if",   \
170         NULL, MTX_DEF)
171 #define CARP_LOCK_DESTROY(cif)  mtx_destroy(&(cif)->vhif_mtx)
172 #define CARP_LOCK_ASSERT(cif)   mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
173 #define CARP_LOCK(cif)          mtx_lock(&(cif)->vhif_mtx)
174 #define CARP_UNLOCK(cif)        mtx_unlock(&(cif)->vhif_mtx)
175 
176 #define CARP_SCLOCK(sc)         mtx_lock(&SC2CIF(sc)->vhif_mtx)
177 #define CARP_SCUNLOCK(sc)       mtx_unlock(&SC2CIF(sc)->vhif_mtx)
178 #define CARP_SCLOCK_ASSERT(sc)  mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
179 
180 #define CARP_LOG(...)   do {                            \
181         if (carp_opts[CARPCTL_LOG] > 0)                 \
182                 log(LOG_INFO, __VA_ARGS__);             \
183 } while (0)
184 
185 #define CARP_DEBUG(...) do {                            \
186         if (carp_opts[CARPCTL_LOG] > 1)                 \
187                 log(LOG_DEBUG, __VA_ARGS__);            \
188 } while (0)
189 
190 static void     carp_hmac_prepare(struct carp_softc *);
191 static void     carp_hmac_generate(struct carp_softc *, u_int32_t *,
192                     unsigned char *);
193 static int      carp_hmac_verify(struct carp_softc *, u_int32_t *,
194                     unsigned char *);
195 static void     carp_setroute(struct carp_softc *, int);
196 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
197 static int      carp_clone_create(struct if_clone *, int, caddr_t);
198 static void     carp_clone_destroy(struct ifnet *);
199 static void     carpdetach(struct carp_softc *, int);
200 static int      carp_prepare_ad(struct mbuf *, struct carp_softc *,
201                     struct carp_header *);
202 static void     carp_send_ad_all(void);
203 static void     carp_send_ad(void *);
204 static void     carp_send_ad_locked(struct carp_softc *);
205 static void     carp_send_arp(struct carp_softc *);
206 static void     carp_master_down(void *);
207 static void     carp_master_down_locked(struct carp_softc *);
208 static int      carp_ioctl(struct ifnet *, u_long, caddr_t);
209 static int      carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
210                     struct rtentry *);
211 static void     carp_start(struct ifnet *);
212 static void     carp_setrun(struct carp_softc *, sa_family_t);
213 static void     carp_set_state(struct carp_softc *, int);
214 static int      carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
215 enum    { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
216 
217 static void     carp_multicast_cleanup(struct carp_softc *);
218 static int      carp_set_addr(struct carp_softc *, struct sockaddr_in *);
219 static int      carp_del_addr(struct carp_softc *, struct sockaddr_in *);
220 static void     carp_carpdev_state_locked(struct carp_if *);
221 static void     carp_sc_state_locked(struct carp_softc *);
222 #ifdef INET6
223 static void     carp_send_na(struct carp_softc *);
224 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
225 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
226 static void     carp_multicast6_cleanup(struct carp_softc *);
227 #endif
228 
229 static LIST_HEAD(, carp_softc) carpif_list;
230 static struct mtx carp_mtx;
231 IFC_SIMPLE_DECLARE(carp, 0);
232 
233 static eventhandler_tag if_detach_event_tag;
234 
235 static __inline u_int16_t
236 carp_cksum(struct mbuf *m, int len)
237 {
238         return (in_cksum(m, len));
239 }
240 
241 static void
242 carp_hmac_prepare(struct carp_softc *sc)
243 {
244         u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
245         u_int8_t vhid = sc->sc_vhid & 0xff;
246         struct ifaddr *ifa;
247         int i, found;
248 #ifdef INET
249         struct in_addr last, cur, in;
250 #endif
251 #ifdef INET6
252         struct in6_addr last6, cur6, in6;
253 #endif
254 
255         if (sc->sc_carpdev)
256                 CARP_SCLOCK(sc);
257 
258         /* XXX: possible race here */
259 
260         /* compute ipad from key */
261         bzero(sc->sc_pad, sizeof(sc->sc_pad));
262         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
263         for (i = 0; i < sizeof(sc->sc_pad); i++)
264                 sc->sc_pad[i] ^= 0x36;
265 
266         /* precompute first part of inner hash */
267         SHA1Init(&sc->sc_sha1);
268         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
269         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
270         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
271         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
272 #ifdef INET
273         cur.s_addr = 0;
274         do {
275                 found = 0;
276                 last = cur;
277                 cur.s_addr = 0xffffffff;
278                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
279                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
280                         if (ifa->ifa_addr->sa_family == AF_INET &&
281                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
282                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
283                                 cur.s_addr = in.s_addr;
284                                 found++;
285                         }
286                 }
287                 if (found)
288                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
289         } while (found);
290 #endif /* INET */
291 #ifdef INET6
292         memset(&cur6, 0, sizeof(cur6));
293         do {
294                 found = 0;
295                 last6 = cur6;
296                 memset(&cur6, 0xff, sizeof(cur6));
297                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
298                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
299                         if (IN6_IS_SCOPE_EMBED(&in6))
300                                 in6.s6_addr16[1] = 0;
301                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
302                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
303                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
304                                 cur6 = in6;
305                                 found++;
306                         }
307                 }
308                 if (found)
309                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
310         } while (found);
311 #endif /* INET6 */
312 
313         /* convert ipad to opad */
314         for (i = 0; i < sizeof(sc->sc_pad); i++)
315                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
316 
317         if (sc->sc_carpdev)
318                 CARP_SCUNLOCK(sc);
319 }
320 
321 static void
322 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
323     unsigned char md[20])
324 {
325         SHA1_CTX sha1ctx;
326 
327         /* fetch first half of inner hash */
328         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
329 
330         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
331         SHA1Final(md, &sha1ctx);
332 
333         /* outer hash */
334         SHA1Init(&sha1ctx);
335         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
336         SHA1Update(&sha1ctx, md, 20);
337         SHA1Final(md, &sha1ctx);
338 }
339 
340 static int
341 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
342     unsigned char md[20])
343 {
344         unsigned char md2[20];
345 
346         CARP_SCLOCK_ASSERT(sc);
347 
348         carp_hmac_generate(sc, counter, md2);
349 
350         return (bcmp(md, md2, sizeof(md2)));
351 }
352 
353 static void
354 carp_setroute(struct carp_softc *sc, int cmd)
355 {
356         struct ifaddr *ifa;
357         int s;
358 
359         if (sc->sc_carpdev)
360                 CARP_SCLOCK_ASSERT(sc);
361 
362         s = splnet();
363         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
364                 if (ifa->ifa_addr->sa_family == AF_INET &&
365                     sc->sc_carpdev != NULL) {
366                         int count = carp_addrcount(
367                             (struct carp_if *)sc->sc_carpdev->if_carp,
368                             ifatoia(ifa), CARP_COUNT_MASTER);
369 
370                         if ((cmd == RTM_ADD && count == 1) ||
371                             (cmd == RTM_DELETE && count == 0))
372                                 rtinit(ifa, cmd, RTF_UP | RTF_HOST);
373                 }
374 #ifdef INET6
375                 if (ifa->ifa_addr->sa_family == AF_INET6) {
376                         if (cmd == RTM_ADD)
377                                 in6_ifaddloop(ifa);
378                         else
379                                 in6_ifremloop(ifa);
380                 }
381 #endif /* INET6 */
382         }
383         splx(s);
384 }
385 
386 static int
387 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
388 {
389 
390         struct carp_softc *sc;
391         struct ifnet *ifp;
392 
393         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
394         ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
395         if (ifp == NULL) {
396                 free(sc, M_CARP);
397                 return (ENOSPC);
398         }
399         
400         sc->sc_flags_backup = 0;
401         sc->sc_suppress = 0;
402         sc->sc_advbase = CARP_DFLTINTV;
403         sc->sc_vhid = -1;       /* required setting */
404         sc->sc_advskew = 0;
405         sc->sc_init_counter = 1;
406         sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
407 #ifdef INET6
408         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
409 #endif
410         sc->sc_imo.imo_membership = (struct in_multi **)malloc(
411             (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
412             M_WAITOK);
413         sc->sc_imo.imo_mfilters = NULL;
414         sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
415         sc->sc_imo.imo_multicast_vif = -1;
416 
417         callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
418         callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
419         callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
420         
421         ifp->if_softc = sc;
422         if_initname(ifp, CARP_IFNAME, unit);
423         ifp->if_mtu = ETHERMTU;
424         ifp->if_flags = IFF_LOOPBACK;
425         ifp->if_ioctl = carp_ioctl;
426         ifp->if_output = carp_looutput;
427         ifp->if_start = carp_start;
428         ifp->if_type = IFT_CARP;
429         ifp->if_snd.ifq_maxlen = ifqmaxlen;
430         ifp->if_hdrlen = 0;
431         if_attach(ifp);
432         bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
433         mtx_lock(&carp_mtx);
434         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
435         mtx_unlock(&carp_mtx);
436         return (0);
437 }
438 
439 static void
440 carp_clone_destroy(struct ifnet *ifp)
441 {
442         struct carp_softc *sc = ifp->if_softc;
443 
444         if (sc->sc_carpdev)
445                 CARP_SCLOCK(sc);
446         carpdetach(sc, 1);      /* Returns unlocked. */
447 
448         mtx_lock(&carp_mtx);
449         LIST_REMOVE(sc, sc_next);
450         mtx_unlock(&carp_mtx);
451         bpfdetach(ifp);
452         if_detach(ifp);
453         if_free_type(ifp, IFT_ETHER);
454         free(sc->sc_imo.imo_membership, M_CARP);
455         free(sc, M_CARP);
456 }
457 
458 /*
459  * This function can be called on CARP interface destroy path,
460  * and in case of the removal of the underlying interface as
461  * well. We differentiate these two cases. In the latter case
462  * we do not cleanup our multicast memberships, since they
463  * are already freed. Also, in the latter case we do not
464  * release the lock on return, because the function will be
465  * called once more, for another CARP instance on the same
466  * interface.
467  */
468 static void
469 carpdetach(struct carp_softc *sc, int unlock)
470 {
471         struct carp_if *cif;
472 
473         callout_stop(&sc->sc_ad_tmo);
474         callout_stop(&sc->sc_md_tmo);
475         callout_stop(&sc->sc_md6_tmo);
476 
477         if (sc->sc_suppress)
478                 carp_suppress_preempt--;
479         sc->sc_suppress = 0;
480 
481         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
482                 carp_suppress_preempt--;
483         sc->sc_sendad_errors = 0;
484 
485         carp_set_state(sc, INIT);
486         SC2IFP(sc)->if_flags &= ~IFF_UP;
487         carp_setrun(sc, 0);
488         if (unlock)
489                 carp_multicast_cleanup(sc);
490 #ifdef INET6
491         carp_multicast6_cleanup(sc);
492 #endif
493 
494         if (sc->sc_carpdev != NULL) {
495                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
496                 CARP_LOCK_ASSERT(cif);
497                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
498                 if (!--cif->vhif_nvrs) {
499                         ifpromisc(sc->sc_carpdev, 0);
500                         sc->sc_carpdev->if_carp = NULL;
501                         CARP_LOCK_DESTROY(cif);
502                         free(cif, M_IFADDR);
503                 } else if (unlock)
504                         CARP_UNLOCK(cif);
505                 sc->sc_carpdev = NULL;
506         }
507 }
508 
509 /* Detach an interface from the carp. */
510 static void
511 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
512 {
513         struct carp_if *cif = (struct carp_if *)ifp->if_carp;
514         struct carp_softc *sc, *nextsc;
515 
516         if (cif == NULL)
517                 return;
518 
519         /*
520          * XXX: At the end of for() cycle the lock will be destroyed.
521          */
522         CARP_LOCK(cif);
523         for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
524                 nextsc = TAILQ_NEXT(sc, sc_list);
525                 carpdetach(sc, 0);
526         }
527 }
528 
529 /*
530  * process input packet.
531  * we have rearranged checks order compared to the rfc,
532  * but it seems more efficient this way or not possible otherwise.
533  */
534 void
535 carp_input(struct mbuf *m, int hlen)
536 {
537         struct ip *ip = mtod(m, struct ip *);
538         struct carp_header *ch;
539         int iplen, len;
540 
541         carpstats.carps_ipackets++;
542 
543         if (!carp_opts[CARPCTL_ALLOW]) {
544                 m_freem(m);
545                 return;
546         }
547 
548         /* check if received on a valid carp interface */
549         if (m->m_pkthdr.rcvif->if_carp == NULL) {
550                 carpstats.carps_badif++;
551                 CARP_LOG("carp_input: packet received on non-carp "
552                     "interface: %s\n",
553                     m->m_pkthdr.rcvif->if_xname);
554                 m_freem(m);
555                 return;
556         }
557 
558         /* verify that the IP TTL is 255.  */
559         if (ip->ip_ttl != CARP_DFLTTL) {
560                 carpstats.carps_badttl++;
561                 CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
562                     ip->ip_ttl,
563                     m->m_pkthdr.rcvif->if_xname);
564                 m_freem(m);
565                 return;
566         }
567 
568         iplen = ip->ip_hl << 2;
569 
570         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
571                 carpstats.carps_badlen++;
572                 CARP_LOG("carp_input: received len %zd < "
573                     "sizeof(struct carp_header)\n",
574                     m->m_len - sizeof(struct ip));
575                 m_freem(m);
576                 return;
577         }
578 
579         if (iplen + sizeof(*ch) < m->m_len) {
580                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
581                         carpstats.carps_hdrops++;
582                         CARP_LOG("carp_input: pullup failed\n");
583                         return;
584                 }
585                 ip = mtod(m, struct ip *);
586         }
587         ch = (struct carp_header *)((char *)ip + iplen);
588 
589         /*
590          * verify that the received packet length is
591          * equal to the CARP header
592          */
593         len = iplen + sizeof(*ch);
594         if (len > m->m_pkthdr.len) {
595                 carpstats.carps_badlen++;
596                 CARP_LOG("carp_input: packet too short %d on %s\n",
597                     m->m_pkthdr.len,
598                     m->m_pkthdr.rcvif->if_xname);
599                 m_freem(m);
600                 return;
601         }
602 
603         if ((m = m_pullup(m, len)) == NULL) {
604                 carpstats.carps_hdrops++;
605                 return;
606         }
607         ip = mtod(m, struct ip *);
608         ch = (struct carp_header *)((char *)ip + iplen);
609 
610         /* verify the CARP checksum */
611         m->m_data += iplen;
612         if (carp_cksum(m, len - iplen)) {
613                 carpstats.carps_badsum++;
614                 CARP_LOG("carp_input: checksum failed on %s\n",
615                     m->m_pkthdr.rcvif->if_xname);
616                 m_freem(m);
617                 return;
618         }
619         m->m_data -= iplen;
620 
621         carp_input_c(m, ch, AF_INET);
622 }
623 
624 #ifdef INET6
625 int
626 carp6_input(struct mbuf **mp, int *offp, int proto)
627 {
628         struct mbuf *m = *mp;
629         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
630         struct carp_header *ch;
631         u_int len;
632 
633         carpstats.carps_ipackets6++;
634 
635         if (!carp_opts[CARPCTL_ALLOW]) {
636                 m_freem(m);
637                 return (IPPROTO_DONE);
638         }
639 
640         /* check if received on a valid carp interface */
641         if (m->m_pkthdr.rcvif->if_carp == NULL) {
642                 carpstats.carps_badif++;
643                 CARP_LOG("carp6_input: packet received on non-carp "
644                     "interface: %s\n",
645                     m->m_pkthdr.rcvif->if_xname);
646                 m_freem(m);
647                 return (IPPROTO_DONE);
648         }
649 
650         /* verify that the IP TTL is 255 */
651         if (ip6->ip6_hlim != CARP_DFLTTL) {
652                 carpstats.carps_badttl++;
653                 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
654                     ip6->ip6_hlim,
655                     m->m_pkthdr.rcvif->if_xname);
656                 m_freem(m);
657                 return (IPPROTO_DONE);
658         }
659 
660         /* verify that we have a complete carp packet */
661         len = m->m_len;
662         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
663         if (ch == NULL) {
664                 carpstats.carps_badlen++;
665                 CARP_LOG("carp6_input: packet size %u too small\n", len);
666                 return (IPPROTO_DONE);
667         }
668 
669 
670         /* verify the CARP checksum */
671         m->m_data += *offp;
672         if (carp_cksum(m, sizeof(*ch))) {
673                 carpstats.carps_badsum++;
674                 CARP_LOG("carp6_input: checksum failed, on %s\n",
675                     m->m_pkthdr.rcvif->if_xname);
676                 m_freem(m);
677                 return (IPPROTO_DONE);
678         }
679         m->m_data -= *offp;
680 
681         carp_input_c(m, ch, AF_INET6);
682         return (IPPROTO_DONE);
683 }
684 #endif /* INET6 */
685 
686 static void
687 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
688 {
689         struct ifnet *ifp = m->m_pkthdr.rcvif;
690         struct carp_softc *sc;
691         u_int64_t tmp_counter;
692         struct timeval sc_tv, ch_tv;
693 
694         /* verify that the VHID is valid on the receiving interface */
695         CARP_LOCK(ifp->if_carp);
696         TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
697                 if (sc->sc_vhid == ch->carp_vhid)
698                         break;
699 
700         if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
701             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
702                 carpstats.carps_badvhid++;
703                 CARP_UNLOCK(ifp->if_carp);
704                 m_freem(m);
705                 return;
706         }
707 
708         getmicrotime(&SC2IFP(sc)->if_lastchange);
709         SC2IFP(sc)->if_ipackets++;
710         SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
711 
712         if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
713                 struct ip *ip = mtod(m, struct ip *);
714                 uint32_t af1 = af;
715 
716                 /* BPF wants net byte order */
717                 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
718                 ip->ip_off = htons(ip->ip_off);
719                 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
720         }
721 
722         /* verify the CARP version. */
723         if (ch->carp_version != CARP_VERSION) {
724                 carpstats.carps_badver++;
725                 SC2IFP(sc)->if_ierrors++;
726                 CARP_UNLOCK(ifp->if_carp);
727                 CARP_LOG("%s; invalid version %d\n",
728                     SC2IFP(sc)->if_xname,
729                     ch->carp_version);
730                 m_freem(m);
731                 return;
732         }
733 
734         /* verify the hash */
735         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
736                 carpstats.carps_badauth++;
737                 SC2IFP(sc)->if_ierrors++;
738                 CARP_UNLOCK(ifp->if_carp);
739                 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
740                 m_freem(m);
741                 return;
742         }
743 
744         tmp_counter = ntohl(ch->carp_counter[0]);
745         tmp_counter = tmp_counter<<32;
746         tmp_counter += ntohl(ch->carp_counter[1]);
747 
748         /* XXX Replay protection goes here */
749 
750         sc->sc_init_counter = 0;
751         sc->sc_counter = tmp_counter;
752 
753         sc_tv.tv_sec = sc->sc_advbase;
754         if (carp_suppress_preempt && sc->sc_advskew <  240)
755                 sc_tv.tv_usec = 240 * 1000000 / 256;
756         else
757                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
758         ch_tv.tv_sec = ch->carp_advbase;
759         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
760 
761         switch (sc->sc_state) {
762         case INIT:
763                 break;
764         case MASTER:
765                 /*
766                  * If we receive an advertisement from a master who's going to
767                  * be more frequent than us, go into BACKUP state.
768                  */
769                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
770                     timevalcmp(&sc_tv, &ch_tv, ==)) {
771                         callout_stop(&sc->sc_ad_tmo);
772                         CARP_DEBUG("%s: MASTER -> BACKUP "
773                            "(more frequent advertisement received)\n",
774                            SC2IFP(sc)->if_xname);
775                         carp_set_state(sc, BACKUP);
776                         carp_setrun(sc, 0);
777                         carp_setroute(sc, RTM_DELETE);
778                 }
779                 break;
780         case BACKUP:
781                 /*
782                  * If we're pre-empting masters who advertise slower than us,
783                  * and this one claims to be slower, treat him as down.
784                  */
785                 if (carp_opts[CARPCTL_PREEMPT] &&
786                     timevalcmp(&sc_tv, &ch_tv, <)) {
787                         CARP_DEBUG("%s: BACKUP -> MASTER "
788                             "(preempting a slower master)\n",
789                             SC2IFP(sc)->if_xname);
790                         carp_master_down_locked(sc);
791                         break;
792                 }
793 
794                 /*
795                  *  If the master is going to advertise at such a low frequency
796                  *  that he's guaranteed to time out, we'd might as well just
797                  *  treat him as timed out now.
798                  */
799                 sc_tv.tv_sec = sc->sc_advbase * 3;
800                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
801                         CARP_DEBUG("%s: BACKUP -> MASTER "
802                             "(master timed out)\n",
803                             SC2IFP(sc)->if_xname);
804                         carp_master_down_locked(sc);
805                         break;
806                 }
807 
808                 /*
809                  * Otherwise, we reset the counter and wait for the next
810                  * advertisement.
811                  */
812                 carp_setrun(sc, af);
813                 break;
814         }
815 
816         CARP_UNLOCK(ifp->if_carp);
817 
818         m_freem(m);
819         return;
820 }
821 
822 static int
823 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
824 {
825         struct m_tag *mtag;
826         struct ifnet *ifp = SC2IFP(sc);
827 
828         if (sc->sc_init_counter) {
829                 /* this could also be seconds since unix epoch */
830                 sc->sc_counter = arc4random();
831                 sc->sc_counter = sc->sc_counter << 32;
832                 sc->sc_counter += arc4random();
833         } else
834                 sc->sc_counter++;
835 
836         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
837         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
838 
839         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
840 
841         /* Tag packet for carp_output */
842         mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
843         if (mtag == NULL) {
844                 m_freem(m);
845                 SC2IFP(sc)->if_oerrors++;
846                 return (ENOMEM);
847         }
848         bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
849         m_tag_prepend(m, mtag);
850 
851         return (0);
852 }
853 
854 static void
855 carp_send_ad_all(void)
856 {
857         struct carp_softc *sc;
858 
859         mtx_lock(&carp_mtx);
860         LIST_FOREACH(sc, &carpif_list, sc_next) {
861                 if (sc->sc_carpdev == NULL)
862                         continue;
863                 CARP_SCLOCK(sc);
864                 if ((SC2IFP(sc)->if_flags & IFF_UP) &&
865                     (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
866                      sc->sc_state == MASTER)
867                         carp_send_ad_locked(sc);
868                 CARP_SCUNLOCK(sc);
869         }
870         mtx_unlock(&carp_mtx);
871 }
872 
873 static void
874 carp_send_ad(void *v)
875 {
876         struct carp_softc *sc = v;
877 
878         CARP_SCLOCK(sc);
879         carp_send_ad_locked(sc);
880         CARP_SCUNLOCK(sc);
881 }
882 
883 static void
884 carp_send_ad_locked(struct carp_softc *sc)
885 {
886         struct carp_header ch;
887         struct timeval tv;
888         struct carp_header *ch_ptr;
889         struct mbuf *m;
890         int len, advbase, advskew;
891 
892         CARP_SCLOCK_ASSERT(sc);
893 
894         /* bow out if we've lost our UPness or RUNNINGuiness */
895         if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
896             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
897                 advbase = 255;
898                 advskew = 255;
899         } else {
900                 advbase = sc->sc_advbase;
901                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
902                         advskew = sc->sc_advskew;
903                 else
904                         advskew = 240;
905                 tv.tv_sec = advbase;
906                 tv.tv_usec = advskew * 1000000 / 256;
907         }
908 
909         ch.carp_version = CARP_VERSION;
910         ch.carp_type = CARP_ADVERTISEMENT;
911         ch.carp_vhid = sc->sc_vhid;
912         ch.carp_advbase = advbase;
913         ch.carp_advskew = advskew;
914         ch.carp_authlen = 7;    /* XXX DEFINE */
915         ch.carp_pad1 = 0;       /* must be zero */
916         ch.carp_cksum = 0;
917 
918 #ifdef INET
919         INIT_VNET_INET(curvnet);
920         if (sc->sc_ia) {
921                 struct ip *ip;
922 
923                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
924                 if (m == NULL) {
925                         SC2IFP(sc)->if_oerrors++;
926                         carpstats.carps_onomem++;
927                         /* XXX maybe less ? */
928                         if (advbase != 255 || advskew != 255)
929                                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
930                                     carp_send_ad, sc);
931                         return;
932                 }
933                 len = sizeof(*ip) + sizeof(ch);
934                 m->m_pkthdr.len = len;
935                 m->m_pkthdr.rcvif = NULL;
936                 m->m_len = len;
937                 MH_ALIGN(m, m->m_len);
938                 m->m_flags |= M_MCAST;
939                 ip = mtod(m, struct ip *);
940                 ip->ip_v = IPVERSION;
941                 ip->ip_hl = sizeof(*ip) >> 2;
942                 ip->ip_tos = IPTOS_LOWDELAY;
943                 ip->ip_len = len;
944                 ip->ip_id = ip_newid();
945                 ip->ip_off = IP_DF;
946                 ip->ip_ttl = CARP_DFLTTL;
947                 ip->ip_p = IPPROTO_CARP;
948                 ip->ip_sum = 0;
949                 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
950                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
951 
952                 ch_ptr = (struct carp_header *)(&ip[1]);
953                 bcopy(&ch, ch_ptr, sizeof(ch));
954                 if (carp_prepare_ad(m, sc, ch_ptr))
955                         return;
956 
957                 m->m_data += sizeof(*ip);
958                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
959                 m->m_data -= sizeof(*ip);
960 
961                 getmicrotime(&SC2IFP(sc)->if_lastchange);
962                 SC2IFP(sc)->if_opackets++;
963