FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_carp.c
1 /*
2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3 * Copyright (c) 2003 Ryan McBride. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24 * THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: src/sys/netinet/ip_carp.c,v 1.59 2008/12/02 21:37:28 bz Exp $");
29
30 #include "opt_carp.h"
31 #include "opt_bpf.h"
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/conf.h>
39 #include <sys/kernel.h>
40 #include <sys/limits.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/module.h>
44 #include <sys/time.h>
45 #include <sys/priv.h>
46 #include <sys/proc.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/signalvar.h>
50 #include <sys/filio.h>
51 #include <sys/sockio.h>
52
53 #include <sys/socket.h>
54 #include <sys/vnode.h>
55 #include <sys/vimage.h>
56
57 #include <machine/stdarg.h>
58
59 #include <net/bpf.h>
60 #include <net/ethernet.h>
61 #include <net/fddi.h>
62 #include <net/iso88025.h>
63 #include <net/if.h>
64 #include <net/if_clone.h>
65 #include <net/if_dl.h>
66 #include <net/if_types.h>
67 #include <net/route.h>
68
69 #ifdef INET
70 #include <netinet/in.h>
71 #include <netinet/in_var.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip_var.h>
75 #include <netinet/if_ether.h>
76 #include <machine/in_cksum.h>
77 #include <netinet/vinet.h>
78 #endif
79
80 #ifdef INET6
81 #include <netinet/icmp6.h>
82 #include <netinet/ip6.h>
83 #include <netinet6/ip6_var.h>
84 #include <netinet6/scope6_var.h>
85 #include <netinet6/nd6.h>
86 #include <netinet6/vinet6.h>
87 #endif
88
89 #include <crypto/sha1.h>
90 #include <netinet/ip_carp.h>
91
92 #define CARP_IFNAME "carp"
93 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
94 SYSCTL_DECL(_net_inet_carp);
95
96 struct carp_softc {
97 struct ifnet *sc_ifp; /* Interface clue */
98 struct ifnet *sc_carpdev; /* Pointer to parent interface */
99 struct in_ifaddr *sc_ia; /* primary iface address */
100 struct ip_moptions sc_imo;
101 #ifdef INET6
102 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */
103 struct ip6_moptions sc_im6o;
104 #endif /* INET6 */
105 TAILQ_ENTRY(carp_softc) sc_list;
106
107 enum { INIT = 0, BACKUP, MASTER } sc_state;
108
109 int sc_flags_backup;
110 int sc_suppress;
111
112 int sc_sendad_errors;
113 #define CARP_SENDAD_MAX_ERRORS 3
114 int sc_sendad_success;
115 #define CARP_SENDAD_MIN_SUCCESS 3
116
117 int sc_vhid;
118 int sc_advskew;
119 int sc_naddrs;
120 int sc_naddrs6;
121 int sc_advbase; /* seconds */
122 int sc_init_counter;
123 u_int64_t sc_counter;
124
125 /* authentication */
126 #define CARP_HMAC_PAD 64
127 unsigned char sc_key[CARP_KEY_LEN];
128 unsigned char sc_pad[CARP_HMAC_PAD];
129 SHA1_CTX sc_sha1;
130
131 struct callout sc_ad_tmo; /* advertisement timeout */
132 struct callout sc_md_tmo; /* master down timeout */
133 struct callout sc_md6_tmo; /* master down timeout */
134
135 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */
136 };
137 #define SC2IFP(sc) ((sc)->sc_ifp)
138
139 int carp_suppress_preempt = 0;
140 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
141 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
142 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
143 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
144 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
145 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
146 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
147 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
148 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
149 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
150 &carp_suppress_preempt, 0, "Preemption is suppressed");
151
152 struct carpstats carpstats;
153 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
154 &carpstats, carpstats,
155 "CARP statistics (struct carpstats, netinet/ip_carp.h)");
156
157 struct carp_if {
158 TAILQ_HEAD(, carp_softc) vhif_vrs;
159 int vhif_nvrs;
160
161 struct ifnet *vhif_ifp;
162 struct mtx vhif_mtx;
163 };
164
165 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
166 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
167
168 /* lock per carp_if queue */
169 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \
170 NULL, MTX_DEF)
171 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx)
172 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
173 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx)
174 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx)
175
176 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx)
177 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx)
178 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
179
180 #define CARP_LOG(...) do { \
181 if (carp_opts[CARPCTL_LOG] > 0) \
182 log(LOG_INFO, __VA_ARGS__); \
183 } while (0)
184
185 #define CARP_DEBUG(...) do { \
186 if (carp_opts[CARPCTL_LOG] > 1) \
187 log(LOG_DEBUG, __VA_ARGS__); \
188 } while (0)
189
190 static void carp_hmac_prepare(struct carp_softc *);
191 static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
192 unsigned char *);
193 static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
194 unsigned char *);
195 static void carp_setroute(struct carp_softc *, int);
196 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
197 static int carp_clone_create(struct if_clone *, int, caddr_t);
198 static void carp_clone_destroy(struct ifnet *);
199 static void carpdetach(struct carp_softc *, int);
200 static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
201 struct carp_header *);
202 static void carp_send_ad_all(void);
203 static void carp_send_ad(void *);
204 static void carp_send_ad_locked(struct carp_softc *);
205 static void carp_send_arp(struct carp_softc *);
206 static void carp_master_down(void *);
207 static void carp_master_down_locked(struct carp_softc *);
208 static int carp_ioctl(struct ifnet *, u_long, caddr_t);
209 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
210 struct rtentry *);
211 static void carp_start(struct ifnet *);
212 static void carp_setrun(struct carp_softc *, sa_family_t);
213 static void carp_set_state(struct carp_softc *, int);
214 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
215 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
216
217 static void carp_multicast_cleanup(struct carp_softc *);
218 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
219 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *);
220 static void carp_carpdev_state_locked(struct carp_if *);
221 static void carp_sc_state_locked(struct carp_softc *);
222 #ifdef INET6
223 static void carp_send_na(struct carp_softc *);
224 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
225 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
226 static void carp_multicast6_cleanup(struct carp_softc *);
227 #endif
228
229 static LIST_HEAD(, carp_softc) carpif_list;
230 static struct mtx carp_mtx;
231 IFC_SIMPLE_DECLARE(carp, 0);
232
233 static eventhandler_tag if_detach_event_tag;
234
235 static __inline u_int16_t
236 carp_cksum(struct mbuf *m, int len)
237 {
238 return (in_cksum(m, len));
239 }
240
241 static void
242 carp_hmac_prepare(struct carp_softc *sc)
243 {
244 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
245 u_int8_t vhid = sc->sc_vhid & 0xff;
246 struct ifaddr *ifa;
247 int i, found;
248 #ifdef INET
249 struct in_addr last, cur, in;
250 #endif
251 #ifdef INET6
252 struct in6_addr last6, cur6, in6;
253 #endif
254
255 if (sc->sc_carpdev)
256 CARP_SCLOCK(sc);
257
258 /* XXX: possible race here */
259
260 /* compute ipad from key */
261 bzero(sc->sc_pad, sizeof(sc->sc_pad));
262 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
263 for (i = 0; i < sizeof(sc->sc_pad); i++)
264 sc->sc_pad[i] ^= 0x36;
265
266 /* precompute first part of inner hash */
267 SHA1Init(&sc->sc_sha1);
268 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
269 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
270 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
271 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
272 #ifdef INET
273 cur.s_addr = 0;
274 do {
275 found = 0;
276 last = cur;
277 cur.s_addr = 0xffffffff;
278 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
279 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
280 if (ifa->ifa_addr->sa_family == AF_INET &&
281 ntohl(in.s_addr) > ntohl(last.s_addr) &&
282 ntohl(in.s_addr) < ntohl(cur.s_addr)) {
283 cur.s_addr = in.s_addr;
284 found++;
285 }
286 }
287 if (found)
288 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
289 } while (found);
290 #endif /* INET */
291 #ifdef INET6
292 memset(&cur6, 0, sizeof(cur6));
293 do {
294 found = 0;
295 last6 = cur6;
296 memset(&cur6, 0xff, sizeof(cur6));
297 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
298 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
299 if (IN6_IS_SCOPE_EMBED(&in6))
300 in6.s6_addr16[1] = 0;
301 if (ifa->ifa_addr->sa_family == AF_INET6 &&
302 memcmp(&in6, &last6, sizeof(in6)) > 0 &&
303 memcmp(&in6, &cur6, sizeof(in6)) < 0) {
304 cur6 = in6;
305 found++;
306 }
307 }
308 if (found)
309 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
310 } while (found);
311 #endif /* INET6 */
312
313 /* convert ipad to opad */
314 for (i = 0; i < sizeof(sc->sc_pad); i++)
315 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
316
317 if (sc->sc_carpdev)
318 CARP_SCUNLOCK(sc);
319 }
320
321 static void
322 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
323 unsigned char md[20])
324 {
325 SHA1_CTX sha1ctx;
326
327 /* fetch first half of inner hash */
328 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
329
330 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
331 SHA1Final(md, &sha1ctx);
332
333 /* outer hash */
334 SHA1Init(&sha1ctx);
335 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
336 SHA1Update(&sha1ctx, md, 20);
337 SHA1Final(md, &sha1ctx);
338 }
339
340 static int
341 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
342 unsigned char md[20])
343 {
344 unsigned char md2[20];
345
346 CARP_SCLOCK_ASSERT(sc);
347
348 carp_hmac_generate(sc, counter, md2);
349
350 return (bcmp(md, md2, sizeof(md2)));
351 }
352
353 static void
354 carp_setroute(struct carp_softc *sc, int cmd)
355 {
356 struct ifaddr *ifa;
357 int s;
358
359 if (sc->sc_carpdev)
360 CARP_SCLOCK_ASSERT(sc);
361
362 s = splnet();
363 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
364 if (ifa->ifa_addr->sa_family == AF_INET &&
365 sc->sc_carpdev != NULL) {
366 int count = carp_addrcount(
367 (struct carp_if *)sc->sc_carpdev->if_carp,
368 ifatoia(ifa), CARP_COUNT_MASTER);
369
370 if ((cmd == RTM_ADD && count == 1) ||
371 (cmd == RTM_DELETE && count == 0))
372 rtinit(ifa, cmd, RTF_UP | RTF_HOST);
373 }
374 #ifdef INET6
375 if (ifa->ifa_addr->sa_family == AF_INET6) {
376 if (cmd == RTM_ADD)
377 in6_ifaddloop(ifa);
378 else
379 in6_ifremloop(ifa);
380 }
381 #endif /* INET6 */
382 }
383 splx(s);
384 }
385
386 static int
387 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
388 {
389
390 struct carp_softc *sc;
391 struct ifnet *ifp;
392
393 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
394 ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
395 if (ifp == NULL) {
396 free(sc, M_CARP);
397 return (ENOSPC);
398 }
399
400 sc->sc_flags_backup = 0;
401 sc->sc_suppress = 0;
402 sc->sc_advbase = CARP_DFLTINTV;
403 sc->sc_vhid = -1; /* required setting */
404 sc->sc_advskew = 0;
405 sc->sc_init_counter = 1;
406 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
407 #ifdef INET6
408 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
409 #endif
410 sc->sc_imo.imo_membership = (struct in_multi **)malloc(
411 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
412 M_WAITOK);
413 sc->sc_imo.imo_mfilters = NULL;
414 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
415 sc->sc_imo.imo_multicast_vif = -1;
416
417 callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
418 callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
419 callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
420
421 ifp->if_softc = sc;
422 if_initname(ifp, CARP_IFNAME, unit);
423 ifp->if_mtu = ETHERMTU;
424 ifp->if_flags = IFF_LOOPBACK;
425 ifp->if_ioctl = carp_ioctl;
426 ifp->if_output = carp_looutput;
427 ifp->if_start = carp_start;
428 ifp->if_type = IFT_CARP;
429 ifp->if_snd.ifq_maxlen = ifqmaxlen;
430 ifp->if_hdrlen = 0;
431 if_attach(ifp);
432 bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
433 mtx_lock(&carp_mtx);
434 LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
435 mtx_unlock(&carp_mtx);
436 return (0);
437 }
438
439 static void
440 carp_clone_destroy(struct ifnet *ifp)
441 {
442 struct carp_softc *sc = ifp->if_softc;
443
444 if (sc->sc_carpdev)
445 CARP_SCLOCK(sc);
446 carpdetach(sc, 1); /* Returns unlocked. */
447
448 mtx_lock(&carp_mtx);
449 LIST_REMOVE(sc, sc_next);
450 mtx_unlock(&carp_mtx);
451 bpfdetach(ifp);
452 if_detach(ifp);
453 if_free_type(ifp, IFT_ETHER);
454 free(sc->sc_imo.imo_membership, M_CARP);
455 free(sc, M_CARP);
456 }
457
458 /*
459 * This function can be called on CARP interface destroy path,
460 * and in case of the removal of the underlying interface as
461 * well. We differentiate these two cases. In the latter case
462 * we do not cleanup our multicast memberships, since they
463 * are already freed. Also, in the latter case we do not
464 * release the lock on return, because the function will be
465 * called once more, for another CARP instance on the same
466 * interface.
467 */
468 static void
469 carpdetach(struct carp_softc *sc, int unlock)
470 {
471 struct carp_if *cif;
472
473 callout_stop(&sc->sc_ad_tmo);
474 callout_stop(&sc->sc_md_tmo);
475 callout_stop(&sc->sc_md6_tmo);
476
477 if (sc->sc_suppress)
478 carp_suppress_preempt--;
479 sc->sc_suppress = 0;
480
481 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
482 carp_suppress_preempt--;
483 sc->sc_sendad_errors = 0;
484
485 carp_set_state(sc, INIT);
486 SC2IFP(sc)->if_flags &= ~IFF_UP;
487 carp_setrun(sc, 0);
488 if (unlock)
489 carp_multicast_cleanup(sc);
490 #ifdef INET6
491 carp_multicast6_cleanup(sc);
492 #endif
493
494 if (sc->sc_carpdev != NULL) {
495 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
496 CARP_LOCK_ASSERT(cif);
497 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
498 if (!--cif->vhif_nvrs) {
499 ifpromisc(sc->sc_carpdev, 0);
500 sc->sc_carpdev->if_carp = NULL;
501 CARP_LOCK_DESTROY(cif);
502 free(cif, M_IFADDR);
503 } else if (unlock)
504 CARP_UNLOCK(cif);
505 sc->sc_carpdev = NULL;
506 }
507 }
508
509 /* Detach an interface from the carp. */
510 static void
511 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
512 {
513 struct carp_if *cif = (struct carp_if *)ifp->if_carp;
514 struct carp_softc *sc, *nextsc;
515
516 if (cif == NULL)
517 return;
518
519 /*
520 * XXX: At the end of for() cycle the lock will be destroyed.
521 */
522 CARP_LOCK(cif);
523 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
524 nextsc = TAILQ_NEXT(sc, sc_list);
525 carpdetach(sc, 0);
526 }
527 }
528
529 /*
530 * process input packet.
531 * we have rearranged checks order compared to the rfc,
532 * but it seems more efficient this way or not possible otherwise.
533 */
534 void
535 carp_input(struct mbuf *m, int hlen)
536 {
537 struct ip *ip = mtod(m, struct ip *);
538 struct carp_header *ch;
539 int iplen, len;
540
541 carpstats.carps_ipackets++;
542
543 if (!carp_opts[CARPCTL_ALLOW]) {
544 m_freem(m);
545 return;
546 }
547
548 /* check if received on a valid carp interface */
549 if (m->m_pkthdr.rcvif->if_carp == NULL) {
550 carpstats.carps_badif++;
551 CARP_LOG("carp_input: packet received on non-carp "
552 "interface: %s\n",
553 m->m_pkthdr.rcvif->if_xname);
554 m_freem(m);
555 return;
556 }
557
558 /* verify that the IP TTL is 255. */
559 if (ip->ip_ttl != CARP_DFLTTL) {
560 carpstats.carps_badttl++;
561 CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
562 ip->ip_ttl,
563 m->m_pkthdr.rcvif->if_xname);
564 m_freem(m);
565 return;
566 }
567
568 iplen = ip->ip_hl << 2;
569
570 if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
571 carpstats.carps_badlen++;
572 CARP_LOG("carp_input: received len %zd < "
573 "sizeof(struct carp_header)\n",
574 m->m_len - sizeof(struct ip));
575 m_freem(m);
576 return;
577 }
578
579 if (iplen + sizeof(*ch) < m->m_len) {
580 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
581 carpstats.carps_hdrops++;
582 CARP_LOG("carp_input: pullup failed\n");
583 return;
584 }
585 ip = mtod(m, struct ip *);
586 }
587 ch = (struct carp_header *)((char *)ip + iplen);
588
589 /*
590 * verify that the received packet length is
591 * equal to the CARP header
592 */
593 len = iplen + sizeof(*ch);
594 if (len > m->m_pkthdr.len) {
595 carpstats.carps_badlen++;
596 CARP_LOG("carp_input: packet too short %d on %s\n",
597 m->m_pkthdr.len,
598 m->m_pkthdr.rcvif->if_xname);
599 m_freem(m);
600 return;
601 }
602
603 if ((m = m_pullup(m, len)) == NULL) {
604 carpstats.carps_hdrops++;
605 return;
606 }
607 ip = mtod(m, struct ip *);
608 ch = (struct carp_header *)((char *)ip + iplen);
609
610 /* verify the CARP checksum */
611 m->m_data += iplen;
612 if (carp_cksum(m, len - iplen)) {
613 carpstats.carps_badsum++;
614 CARP_LOG("carp_input: checksum failed on %s\n",
615 m->m_pkthdr.rcvif->if_xname);
616 m_freem(m);
617 return;
618 }
619 m->m_data -= iplen;
620
621 carp_input_c(m, ch, AF_INET);
622 }
623
624 #ifdef INET6
625 int
626 carp6_input(struct mbuf **mp, int *offp, int proto)
627 {
628 struct mbuf *m = *mp;
629 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
630 struct carp_header *ch;
631 u_int len;
632
633 carpstats.carps_ipackets6++;
634
635 if (!carp_opts[CARPCTL_ALLOW]) {
636 m_freem(m);
637 return (IPPROTO_DONE);
638 }
639
640 /* check if received on a valid carp interface */
641 if (m->m_pkthdr.rcvif->if_carp == NULL) {
642 carpstats.carps_badif++;
643 CARP_LOG("carp6_input: packet received on non-carp "
644 "interface: %s\n",
645 m->m_pkthdr.rcvif->if_xname);
646 m_freem(m);
647 return (IPPROTO_DONE);
648 }
649
650 /* verify that the IP TTL is 255 */
651 if (ip6->ip6_hlim != CARP_DFLTTL) {
652 carpstats.carps_badttl++;
653 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
654 ip6->ip6_hlim,
655 m->m_pkthdr.rcvif->if_xname);
656 m_freem(m);
657 return (IPPROTO_DONE);
658 }
659
660 /* verify that we have a complete carp packet */
661 len = m->m_len;
662 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
663 if (ch == NULL) {
664 carpstats.carps_badlen++;
665 CARP_LOG("carp6_input: packet size %u too small\n", len);
666 return (IPPROTO_DONE);
667 }
668
669
670 /* verify the CARP checksum */
671 m->m_data += *offp;
672 if (carp_cksum(m, sizeof(*ch))) {
673 carpstats.carps_badsum++;
674 CARP_LOG("carp6_input: checksum failed, on %s\n",
675 m->m_pkthdr.rcvif->if_xname);
676 m_freem(m);
677 return (IPPROTO_DONE);
678 }
679 m->m_data -= *offp;
680
681 carp_input_c(m, ch, AF_INET6);
682 return (IPPROTO_DONE);
683 }
684 #endif /* INET6 */
685
686 static void
687 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
688 {
689 struct ifnet *ifp = m->m_pkthdr.rcvif;
690 struct carp_softc *sc;
691 u_int64_t tmp_counter;
692 struct timeval sc_tv, ch_tv;
693
694 /* verify that the VHID is valid on the receiving interface */
695 CARP_LOCK(ifp->if_carp);
696 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
697 if (sc->sc_vhid == ch->carp_vhid)
698 break;
699
700 if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
701 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
702 carpstats.carps_badvhid++;
703 CARP_UNLOCK(ifp->if_carp);
704 m_freem(m);
705 return;
706 }
707
708 getmicrotime(&SC2IFP(sc)->if_lastchange);
709 SC2IFP(sc)->if_ipackets++;
710 SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
711
712 if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
713 struct ip *ip = mtod(m, struct ip *);
714 uint32_t af1 = af;
715
716 /* BPF wants net byte order */
717 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
718 ip->ip_off = htons(ip->ip_off);
719 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
720 }
721
722 /* verify the CARP version. */
723 if (ch->carp_version != CARP_VERSION) {
724 carpstats.carps_badver++;
725 SC2IFP(sc)->if_ierrors++;
726 CARP_UNLOCK(ifp->if_carp);
727 CARP_LOG("%s; invalid version %d\n",
728 SC2IFP(sc)->if_xname,
729 ch->carp_version);
730 m_freem(m);
731 return;
732 }
733
734 /* verify the hash */
735 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
736 carpstats.carps_badauth++;
737 SC2IFP(sc)->if_ierrors++;
738 CARP_UNLOCK(ifp->if_carp);
739 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
740 m_freem(m);
741 return;
742 }
743
744 tmp_counter = ntohl(ch->carp_counter[0]);
745 tmp_counter = tmp_counter<<32;
746 tmp_counter += ntohl(ch->carp_counter[1]);
747
748 /* XXX Replay protection goes here */
749
750 sc->sc_init_counter = 0;
751 sc->sc_counter = tmp_counter;
752
753 sc_tv.tv_sec = sc->sc_advbase;
754 if (carp_suppress_preempt && sc->sc_advskew < 240)
755 sc_tv.tv_usec = 240 * 1000000 / 256;
756 else
757 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
758 ch_tv.tv_sec = ch->carp_advbase;
759 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
760
761 switch (sc->sc_state) {
762 case INIT:
763 break;
764 case MASTER:
765 /*
766 * If we receive an advertisement from a master who's going to
767 * be more frequent than us, go into BACKUP state.
768 */
769 if (timevalcmp(&sc_tv, &ch_tv, >) ||
770 timevalcmp(&sc_tv, &ch_tv, ==)) {
771 callout_stop(&sc->sc_ad_tmo);
772 CARP_DEBUG("%s: MASTER -> BACKUP "
773 "(more frequent advertisement received)\n",
774 SC2IFP(sc)->if_xname);
775 carp_set_state(sc, BACKUP);
776 carp_setrun(sc, 0);
777 carp_setroute(sc, RTM_DELETE);
778 }
779 break;
780 case BACKUP:
781 /*
782 * If we're pre-empting masters who advertise slower than us,
783 * and this one claims to be slower, treat him as down.
784 */
785 if (carp_opts[CARPCTL_PREEMPT] &&
786 timevalcmp(&sc_tv, &ch_tv, <)) {
787 CARP_DEBUG("%s: BACKUP -> MASTER "
788 "(preempting a slower master)\n",
789 SC2IFP(sc)->if_xname);
790 carp_master_down_locked(sc);
791 break;
792 }
793
794 /*
795 * If the master is going to advertise at such a low frequency
796 * that he's guaranteed to time out, we'd might as well just
797 * treat him as timed out now.
798 */
799 sc_tv.tv_sec = sc->sc_advbase * 3;
800 if (timevalcmp(&sc_tv, &ch_tv, <)) {
801 CARP_DEBUG("%s: BACKUP -> MASTER "
802 "(master timed out)\n",
803 SC2IFP(sc)->if_xname);
804 carp_master_down_locked(sc);
805 break;
806 }
807
808 /*
809 * Otherwise, we reset the counter and wait for the next
810 * advertisement.
811 */
812 carp_setrun(sc, af);
813 break;
814 }
815
816 CARP_UNLOCK(ifp->if_carp);
817
818 m_freem(m);
819 return;
820 }
821
822 static int
823 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
824 {
825 struct m_tag *mtag;
826 struct ifnet *ifp = SC2IFP(sc);
827
828 if (sc->sc_init_counter) {
829 /* this could also be seconds since unix epoch */
830 sc->sc_counter = arc4random();
831 sc->sc_counter = sc->sc_counter << 32;
832 sc->sc_counter += arc4random();
833 } else
834 sc->sc_counter++;
835
836 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
837 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
838
839 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
840
841 /* Tag packet for carp_output */
842 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
843 if (mtag == NULL) {
844 m_freem(m);
845 SC2IFP(sc)->if_oerrors++;
846 return (ENOMEM);
847 }
848 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
849 m_tag_prepend(m, mtag);
850
851 return (0);
852 }
853
854 static void
855 carp_send_ad_all(void)
856 {
857 struct carp_softc *sc;
858
859 mtx_lock(&carp_mtx);
860 LIST_FOREACH(sc, &carpif_list, sc_next) {
861 if (sc->sc_carpdev == NULL)
862 continue;
863 CARP_SCLOCK(sc);
864 if ((SC2IFP(sc)->if_flags & IFF_UP) &&
865 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
866 sc->sc_state == MASTER)
867 carp_send_ad_locked(sc);
868 CARP_SCUNLOCK(sc);
869 }
870 mtx_unlock(&carp_mtx);
871 }
872
873 static void
874 carp_send_ad(void *v)
875 {
876 struct carp_softc *sc = v;
877
878 CARP_SCLOCK(sc);
879 carp_send_ad_locked(sc);
880 CARP_SCUNLOCK(sc);
881 }
882
883 static void
884 carp_send_ad_locked(struct carp_softc *sc)
885 {
886 struct carp_header ch;
887 struct timeval tv;
888 struct carp_header *ch_ptr;
889 struct mbuf *m;
890 int len, advbase, advskew;
891
892 CARP_SCLOCK_ASSERT(sc);
893
894 /* bow out if we've lost our UPness or RUNNINGuiness */
895 if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
896 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
897 advbase = 255;
898 advskew = 255;
899 } else {
900 advbase = sc->sc_advbase;
901 if (!carp_suppress_preempt || sc->sc_advskew > 240)
902 advskew = sc->sc_advskew;
903 else
904 advskew = 240;
905 tv.tv_sec = advbase;
906 tv.tv_usec = advskew * 1000000 / 256;
907 }
908
909 ch.carp_version = CARP_VERSION;
910 ch.carp_type = CARP_ADVERTISEMENT;
911 ch.carp_vhid = sc->sc_vhid;
912 ch.carp_advbase = advbase;
913 ch.carp_advskew = advskew;
914 ch.carp_authlen = 7; /* XXX DEFINE */
915 ch.carp_pad1 = 0; /* must be zero */
916 ch.carp_cksum = 0;
917
918 #ifdef INET
919 INIT_VNET_INET(curvnet);
920 if (sc->sc_ia) {
921 struct ip *ip;
922
923 MGETHDR(m, M_DONTWAIT, MT_HEADER);
924 if (m == NULL) {
925 SC2IFP(sc)->if_oerrors++;
926 carpstats.carps_onomem++;
927 /* XXX maybe less ? */
928 if (advbase != 255 || advskew != 255)
929 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
930 carp_send_ad, sc);
931 return;
932 }
933 len = sizeof(*ip) + sizeof(ch);
934 m->m_pkthdr.len = len;
935 m->m_pkthdr.rcvif = NULL;
936 m->m_len = len;
937 MH_ALIGN(m, m->m_len);
938 m->m_flags |= M_MCAST;
939 ip = mtod(m, struct ip *);
940 ip->ip_v = IPVERSION;
941 ip->ip_hl = sizeof(*ip) >> 2;
942 ip->ip_tos = IPTOS_LOWDELAY;
943 ip->ip_len = len;
944 ip->ip_id = ip_newid();
945 ip->ip_off = IP_DF;
946 ip->ip_ttl = CARP_DFLTTL;
947 ip->ip_p = IPPROTO_CARP;
948 ip->ip_sum = 0;
949 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
950 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
951
952 ch_ptr = (struct carp_header *)(&ip[1]);
953 bcopy(&ch, ch_ptr, sizeof(ch));
954 if (carp_prepare_ad(m, sc, ch_ptr))
955 return;
956
957 m->m_data += sizeof(*ip);
958 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
959 m->m_data -= sizeof(*ip);
960
961 getmicrotime(&SC2IFP(sc)->if_lastchange);
962 SC2IFP(sc)->if_opackets++;
963 |