FreeBSD/Linux Kernel Cross Reference
sys/net/if_lagg.c
1 /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */
2
3 /*
4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/cdefs.h>
20 __FBSDID("$FreeBSD: releng/6.3/sys/net/if_lagg.c 174831 2007-12-21 05:35:29Z thompsa $");
21
22 #include "opt_inet.h"
23 #include "opt_inet6.h"
24
25 #include <sys/param.h>
26 #include <sys/kernel.h>
27 #include <sys/malloc.h>
28 #include <sys/mbuf.h>
29 #include <sys/queue.h>
30 #include <sys/socket.h>
31 #include <sys/sockio.h>
32 #include <sys/sysctl.h>
33 #include <sys/module.h>
34 #include <sys/systm.h>
35 #include <sys/proc.h>
36 #include <sys/lock.h>
37 #include <sys/rwlock.h>
38 #include <sys/taskqueue.h>
39
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_clone.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_llc.h>
46 #include <net/if_media.h>
47 #include <net/if_types.h>
48 #include <net/if_var.h>
49 #include <net/bpf.h>
50
51 #ifdef INET
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/if_ether.h>
55 #include <netinet/ip.h>
56 #endif
57
58 #ifdef INET6
59 #include <netinet/ip6.h>
60 #endif
61
62 #include <net/if_vlan_var.h>
63 #include <net/if_lagg.h>
64 #include <net/ieee8023ad_lacp.h>
65
66 /* Special flags we should propagate to the lagg ports. */
67 static struct {
68 int flag;
69 int (*func)(struct ifnet *, int);
70 } lagg_pflags[] = {
71 {IFF_PROMISC, ifpromisc},
72 {IFF_ALLMULTI, if_allmulti},
73 {0, NULL}
74 };
75
76 SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */
77 static struct mtx lagg_list_mtx;
78 eventhandler_tag lagg_detach_cookie = NULL;
79
80 static int lagg_clone_create(struct if_clone *, int);
81 static void lagg_clone_destroy(struct ifnet *);
82 static void lagg_lladdr(struct lagg_softc *, uint8_t *);
83 static void lagg_capabilities(struct lagg_softc *);
84 static void lagg_port_lladdr(struct lagg_port *, uint8_t *);
85 static void lagg_port_setlladdr(void *, int);
86 static int lagg_port_create(struct lagg_softc *, struct ifnet *);
87 static int lagg_port_destroy(struct lagg_port *, int);
88 static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
89 static void lagg_linkstate(struct lagg_softc *);
90 static void lagg_port_state(struct ifnet *, int);
91 static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
92 static int lagg_port_output(struct ifnet *, struct mbuf *,
93 struct sockaddr *, struct rtentry *);
94 static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
95 static int lagg_port_checkstacking(struct lagg_softc *);
96 static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
97 static void lagg_init(void *);
98 static void lagg_stop(struct lagg_softc *);
99 static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
100 static int lagg_ether_setmulti(struct lagg_softc *);
101 static int lagg_ether_cmdmulti(struct lagg_port *, int);
102 static void lagg_ether_purgemulti(struct lagg_softc *);
103 static int lagg_setflag(struct lagg_port *, int, int,
104 int (*func)(struct ifnet *, int));
105 static int lagg_setflags(struct lagg_port *, int status);
106 static void lagg_start(struct ifnet *);
107 static int lagg_media_change(struct ifnet *);
108 static void lagg_media_status(struct ifnet *, struct ifmediareq *);
109 static struct lagg_port *lagg_link_active(struct lagg_softc *,
110 struct lagg_port *);
111 static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
112
113 IFC_SIMPLE_DECLARE(lagg, 0);
114
115 /* Simple round robin */
116 static int lagg_rr_attach(struct lagg_softc *);
117 static int lagg_rr_detach(struct lagg_softc *);
118 static int lagg_rr_start(struct lagg_softc *, struct mbuf *);
119 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
120 struct mbuf *);
121
122 /* Active failover */
123 static int lagg_fail_attach(struct lagg_softc *);
124 static int lagg_fail_detach(struct lagg_softc *);
125 static int lagg_fail_start(struct lagg_softc *, struct mbuf *);
126 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
127 struct mbuf *);
128
129 /* Loadbalancing */
130 static int lagg_lb_attach(struct lagg_softc *);
131 static int lagg_lb_detach(struct lagg_softc *);
132 static int lagg_lb_port_create(struct lagg_port *);
133 static void lagg_lb_port_destroy(struct lagg_port *);
134 static int lagg_lb_start(struct lagg_softc *, struct mbuf *);
135 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
136 struct mbuf *);
137 static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
138
139 /* 802.3ad LACP */
140 static int lagg_lacp_attach(struct lagg_softc *);
141 static int lagg_lacp_detach(struct lagg_softc *);
142 static int lagg_lacp_start(struct lagg_softc *, struct mbuf *);
143 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
144 struct mbuf *);
145 static void lagg_lacp_lladdr(struct lagg_softc *);
146
147 /* lagg protocol table */
148 static const struct {
149 int ti_proto;
150 int (*ti_attach)(struct lagg_softc *);
151 } lagg_protos[] = {
152 { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach },
153 { LAGG_PROTO_FAILOVER, lagg_fail_attach },
154 { LAGG_PROTO_LOADBALANCE, lagg_lb_attach },
155 { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach },
156 { LAGG_PROTO_LACP, lagg_lacp_attach },
157 { LAGG_PROTO_NONE, NULL }
158 };
159
160 /*
161 * Return a 32-bit hash of the given buffer.
162 * XXX Taken from sys/sys/hash.h. This file can not be included directly due to
163 * compiler warnings that need an API change to fix.
164 */
165 #define HASHSTEP(x,c) (((x << 5) + x) + (c))
166 static __inline uint32_t
167 hash32_buf(const void *buf, size_t len, uint32_t hash)
168 {
169 const unsigned char *p = buf;
170
171 while (len--)
172 hash = HASHSTEP(hash, *p++);
173
174 return hash;
175 }
176 #undef HASHSTEP
177
178 static int
179 lagg_modevent(module_t mod, int type, void *data)
180 {
181
182 switch (type) {
183 case MOD_LOAD:
184 mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
185 SLIST_INIT(&lagg_list);
186 if_clone_attach(&lagg_cloner);
187 lagg_input_p = lagg_input;
188 lagg_linkstate_p = lagg_port_state;
189 lagg_detach_cookie = EVENTHANDLER_REGISTER(
190 ifnet_departure_event, lagg_port_ifdetach, NULL,
191 EVENTHANDLER_PRI_ANY);
192 break;
193 case MOD_UNLOAD:
194 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
195 lagg_detach_cookie);
196 if_clone_detach(&lagg_cloner);
197 while (!SLIST_EMPTY(&lagg_list))
198 lagg_clone_destroy(SLIST_FIRST(&lagg_list)->sc_ifp);
199 lagg_input_p = NULL;
200 lagg_linkstate_p = NULL;
201 mtx_destroy(&lagg_list_mtx);
202 break;
203 default:
204 return (EOPNOTSUPP);
205 }
206 return (0);
207 }
208
209 static moduledata_t lagg_mod = {
210 "if_lagg",
211 lagg_modevent,
212 0
213 };
214
215 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
216
217 static int
218 lagg_clone_create(struct if_clone *ifc, int unit)
219 {
220 struct lagg_softc *sc;
221 struct ifnet *ifp;
222 int i, error = 0;
223 static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
224
225 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
226 ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
227 if (ifp == NULL) {
228 free(sc, M_DEVBUF);
229 return (ENOSPC);
230 }
231
232 sc->sc_proto = LAGG_PROTO_NONE;
233 for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
234 if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
235 sc->sc_proto = lagg_protos[i].ti_proto;
236 if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
237 if_free_type(ifp, IFT_ETHER);
238 free(sc, M_DEVBUF);
239 return (error);
240 }
241 break;
242 }
243 }
244 LAGG_LOCK_INIT(sc);
245 SLIST_INIT(&sc->sc_ports);
246 TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
247
248 /* Initialise pseudo media types */
249 ifmedia_init(&sc->sc_media, 0, lagg_media_change,
250 lagg_media_status);
251 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
252 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
253
254 if_initname(ifp, ifc->ifc_name, unit);
255 ifp->if_type = IFT_ETHER;
256 ifp->if_softc = sc;
257 ifp->if_start = lagg_start;
258 ifp->if_init = lagg_init;
259 ifp->if_ioctl = lagg_ioctl;
260 ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
261
262 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
263 ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
264 IFQ_SET_READY(&ifp->if_snd);
265
266 /*
267 * Attach as an ordinary ethernet device, childs will be attached
268 * as special device IFT_IEEE8023ADLAG.
269 */
270 ether_ifattach(ifp, eaddr);
271
272 /* Insert into the global list of laggs */
273 mtx_lock(&lagg_list_mtx);
274 SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
275 mtx_unlock(&lagg_list_mtx);
276
277 return (0);
278 }
279
280 static void
281 lagg_clone_destroy(struct ifnet *ifp)
282 {
283 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
284 struct lagg_port *lp;
285
286 LAGG_WLOCK(sc);
287
288 lagg_stop(sc);
289 ifp->if_flags &= ~IFF_UP;
290
291 /* Shutdown and remove lagg ports */
292 while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
293 lagg_port_destroy(lp, 1);
294 /* Unhook the aggregation protocol */
295 (*sc->sc_detach)(sc);
296
297 /* Remove any multicast groups that we may have joined. */
298 lagg_ether_purgemulti(sc);
299
300 LAGG_WUNLOCK(sc);
301
302 ifmedia_removeall(&sc->sc_media);
303 ether_ifdetach(ifp);
304 if_free_type(ifp, IFT_ETHER);
305
306 mtx_lock(&lagg_list_mtx);
307 SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
308 mtx_unlock(&lagg_list_mtx);
309
310 taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
311 LAGG_LOCK_DESTROY(sc);
312 free(sc, M_DEVBUF);
313 }
314
315 static void
316 lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
317 {
318 struct ifnet *ifp = sc->sc_ifp;
319
320 if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
321 return;
322
323 bcopy(lladdr, IFP2ENADDR(ifp), ETHER_ADDR_LEN);
324 bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
325 /* Let the protocol know the MAC has changed */
326 if (sc->sc_lladdr != NULL)
327 (*sc->sc_lladdr)(sc);
328 }
329
330 static void
331 lagg_capabilities(struct lagg_softc *sc)
332 {
333 struct lagg_port *lp;
334 int cap = ~0, ena = ~0;
335
336 LAGG_WLOCK_ASSERT(sc);
337
338 /* Get capabilities from the lagg ports */
339 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
340 cap &= lp->lp_ifp->if_capabilities;
341 ena &= lp->lp_ifp->if_capenable;
342 }
343 cap = (cap == ~0 ? 0 : cap);
344 ena = (ena == ~0 ? 0 : ena);
345
346 if (sc->sc_ifp->if_capabilities != cap ||
347 sc->sc_ifp->if_capenable != ena) {
348 sc->sc_ifp->if_capabilities = cap;
349 sc->sc_ifp->if_capenable = ena;
350 getmicrotime(&sc->sc_ifp->if_lastchange);
351
352 if (sc->sc_ifflags & IFF_DEBUG)
353 if_printf(sc->sc_ifp,
354 "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
355 }
356 }
357
358 static void
359 lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
360 {
361 struct lagg_softc *sc = lp->lp_softc;
362 struct ifnet *ifp = lp->lp_ifp;
363 struct lagg_llq *llq;
364 int pending = 0;
365
366 LAGG_WLOCK_ASSERT(sc);
367
368 if (lp->lp_detaching ||
369 memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
370 return;
371
372 /* Check to make sure its not already queued to be changed */
373 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
374 if (llq->llq_ifp == ifp) {
375 pending = 1;
376 break;
377 }
378 }
379
380 if (!pending) {
381 llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
382 if (llq == NULL) /* XXX what to do */
383 return;
384 }
385
386 /* Update the lladdr even if pending, it may have changed */
387 llq->llq_ifp = ifp;
388 bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
389
390 if (!pending)
391 SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
392
393 taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
394 }
395
396 /*
397 * Set the interface MAC address from a taskqueue to avoid a LOR.
398 */
399 static void
400 lagg_port_setlladdr(void *arg, int pending)
401 {
402 struct lagg_softc *sc = (struct lagg_softc *)arg;
403 struct lagg_llq *llq, *head;
404 struct ifnet *ifp;
405 int error;
406
407 /* Grab a local reference of the queue and remove it from the softc */
408 LAGG_WLOCK(sc);
409 head = SLIST_FIRST(&sc->sc_llq_head);
410 SLIST_FIRST(&sc->sc_llq_head) = NULL;
411 LAGG_WUNLOCK(sc);
412
413 /*
414 * Traverse the queue and set the lladdr on each ifp. It is safe to do
415 * unlocked as we have the only reference to it.
416 */
417 for (llq = head; llq != NULL; llq = head) {
418 ifp = llq->llq_ifp;
419
420 /* Set the link layer address */
421 error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
422 if (error)
423 printf("%s: setlladdr failed on %s\n", __func__,
424 ifp->if_xname);
425
426 head = SLIST_NEXT(llq, llq_entries);
427 free(llq, M_DEVBUF);
428 }
429 }
430
431 static int
432 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
433 {
434 struct lagg_softc *sc_ptr;
435 struct lagg_port *lp;
436 int error = 0;
437
438 LAGG_WLOCK_ASSERT(sc);
439
440 /* Limit the maximal number of lagg ports */
441 if (sc->sc_count >= LAGG_MAX_PORTS)
442 return (ENOSPC);
443
444 /* New lagg port has to be in an idle state */
445 if (ifp->if_drv_flags & IFF_DRV_OACTIVE)
446 return (EBUSY);
447
448 /* Check if port has already been associated to a lagg */
449 if (ifp->if_lagg != NULL)
450 return (EBUSY);
451
452 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
453 if (ifp->if_type != IFT_ETHER)
454 return (EPROTONOSUPPORT);
455
456 /* Allow the first Ethernet member to define the MTU */
457 if (SLIST_EMPTY(&sc->sc_ports))
458 sc->sc_ifp->if_mtu = ifp->if_mtu;
459 else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
460 if_printf(sc->sc_ifp, "invalid MTU for %s\n",
461 ifp->if_xname);
462 return (EINVAL);
463 }
464
465 if ((lp = malloc(sizeof(struct lagg_port),
466 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
467 return (ENOMEM);
468
469 /* Check if port is a stacked lagg */
470 mtx_lock(&lagg_list_mtx);
471 SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
472 if (ifp == sc_ptr->sc_ifp) {
473 mtx_unlock(&lagg_list_mtx);
474 free(lp, M_DEVBUF);
475 return (EINVAL);
476 /* XXX disable stacking for the moment, its untested
477 lp->lp_flags |= LAGG_PORT_STACK;
478 if (lagg_port_checkstacking(sc_ptr) >=
479 LAGG_MAX_STACKING) {
480 mtx_unlock(&lagg_list_mtx);
481 free(lp, M_DEVBUF);
482 return (E2BIG);
483 }
484 */
485 }
486 }
487 mtx_unlock(&lagg_list_mtx);
488
489 /* Change the interface type */
490 lp->lp_iftype = ifp->if_type;
491 ifp->if_type = IFT_IEEE8023ADLAG;
492 ifp->if_lagg = lp;
493 lp->lp_ioctl = ifp->if_ioctl;
494 ifp->if_ioctl = lagg_port_ioctl;
495 lp->lp_output = ifp->if_output;
496 ifp->if_output = lagg_port_output;
497
498 lp->lp_ifp = ifp;
499 lp->lp_softc = sc;
500
501 /* Save port link layer address */
502 bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
503
504 if (SLIST_EMPTY(&sc->sc_ports)) {
505 sc->sc_primary = lp;
506 lagg_lladdr(sc, IF_LLADDR(ifp));
507 } else {
508 /* Update link layer address for this port */
509 lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
510 }
511
512 /* Insert into the list of ports */
513 SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
514 sc->sc_count++;
515
516 /* Update lagg capabilities */
517 lagg_capabilities(sc);
518 lagg_linkstate(sc);
519
520 /* Add multicast addresses and interface flags to this port */
521 lagg_ether_cmdmulti(lp, 1);
522 lagg_setflags(lp, 1);
523
524 if (sc->sc_port_create != NULL)
525 error = (*sc->sc_port_create)(lp);
526 if (error) {
527 /* remove the port again, without calling sc_port_destroy */
528 lagg_port_destroy(lp, 0);
529 return (error);
530 }
531
532 return (error);
533 }
534
535 static int
536 lagg_port_checkstacking(struct lagg_softc *sc)
537 {
538 struct lagg_softc *sc_ptr;
539 struct lagg_port *lp;
540 int m = 0;
541
542 LAGG_WLOCK_ASSERT(sc);
543
544 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
545 if (lp->lp_flags & LAGG_PORT_STACK) {
546 sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
547 m = MAX(m, lagg_port_checkstacking(sc_ptr));
548 }
549 }
550
551 return (m + 1);
552 }
553
554 static int
555 lagg_port_destroy(struct lagg_port *lp, int runpd)
556 {
557 struct lagg_softc *sc = lp->lp_softc;
558 struct lagg_port *lp_ptr;
559 struct lagg_llq *llq;
560 struct ifnet *ifp = lp->lp_ifp;
561
562 LAGG_WLOCK_ASSERT(sc);
563
564 if (runpd && sc->sc_port_destroy != NULL)
565 (*sc->sc_port_destroy)(lp);
566
567 /*
568 * Remove multicast addresses and interface flags from this port and
569 * reset the MAC address, skip if the interface is being detached.
570 */
571 if (!lp->lp_detaching) {
572 lagg_ether_cmdmulti(lp, 0);
573 lagg_setflags(lp, 0);
574 lagg_port_lladdr(lp, lp->lp_lladdr);
575 }
576
577 /* Restore interface */
578 ifp->if_type = lp->lp_iftype;
579 ifp->if_ioctl = lp->lp_ioctl;
580 ifp->if_output = lp->lp_output;
581 ifp->if_lagg = NULL;
582
583 /* Finally, remove the port from the lagg */
584 SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
585 sc->sc_count--;
586
587 /* Update the primary interface */
588 if (lp == sc->sc_primary) {
589 uint8_t lladdr[ETHER_ADDR_LEN];
590
591 if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
592 bzero(&lladdr, ETHER_ADDR_LEN);
593 } else {
594 bcopy(lp_ptr->lp_lladdr,
595 lladdr, ETHER_ADDR_LEN);
596 }
597 lagg_lladdr(sc, lladdr);
598 sc->sc_primary = lp_ptr;
599
600 /* Update link layer address for each port */
601 SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
602 lagg_port_lladdr(lp_ptr, lladdr);
603 }
604
605 /* Remove any pending lladdr changes from the queue */
606 if (lp->lp_detaching) {
607 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
608 if (llq->llq_ifp == ifp) {
609 SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
610 llq_entries);
611 free(llq, M_DEVBUF);
612 break; /* Only appears once */
613 }
614 }
615 }
616
617 if (lp->lp_ifflags)
618 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
619
620 free(lp, M_DEVBUF);
621
622 /* Update lagg capabilities */
623 lagg_capabilities(sc);
624 lagg_linkstate(sc);
625
626 return (0);
627 }
628
629 static int
630 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
631 {
632 struct lagg_reqport *rp = (struct lagg_reqport *)data;
633 struct lagg_softc *sc;
634 struct lagg_port *lp = NULL;
635 int error = 0;
636
637 /* Should be checked by the caller */
638 if (ifp->if_type != IFT_IEEE8023ADLAG ||
639 (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
640 goto fallback;
641
642 switch (cmd) {
643 case SIOCGLAGGPORT:
644 if (rp->rp_portname[0] == '\0' ||
645 ifunit(rp->rp_portname) != ifp) {
646 error = EINVAL;
647 break;
648 }
649
650 LAGG_RLOCK(sc);
651 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
652 error = ENOENT;
653 LAGG_RUNLOCK(sc);
654 break;
655 }
656
657 lagg_port2req(lp, rp);
658 LAGG_RUNLOCK(sc);
659 break;
660
661 case SIOCSIFCAP:
662 if (lp->lp_ioctl == NULL) {
663 error = EINVAL;
664 break;
665 }
666 error = (*lp->lp_ioctl)(ifp, cmd, data);
667 if (error)
668 break;
669
670 /* Update lagg interface capabilities */
671 LAGG_WLOCK(sc);
672 lagg_capabilities(sc);
673 LAGG_WUNLOCK(sc);
674 break;
675
676 case SIOCSIFMTU:
677 /* Do not allow the MTU to be changed once joined */
678 error = EINVAL;
679 break;
680
681 default:
682 goto fallback;
683 }
684
685 return (error);
686
687 fallback:
688 if (lp->lp_ioctl != NULL)
689 return ((*lp->lp_ioctl)(ifp, cmd, data));
690
691 return (EINVAL);
692 }
693
694 static int
695 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
696 struct sockaddr *dst, struct rtentry *rt0)
697 {
698 struct lagg_port *lp = ifp->if_lagg;
699 struct ether_header *eh;
700 short type = 0;
701
702 switch (dst->sa_family) {
703 case pseudo_AF_HDRCMPLT:
704 case AF_UNSPEC:
705 eh = (struct ether_header *)dst->sa_data;
706 type = eh->ether_type;
707 break;
708 }
709
710 /*
711 * Only allow ethernet types required to initiate or maintain the link,
712 * aggregated frames take a different path.
713 */
714 switch (ntohs(type)) {
715 case ETHERTYPE_PAE: /* EAPOL PAE/802.1x */
716 return ((*lp->lp_output)(ifp, m, dst, rt0));
717 }
718
719 /* drop any other frames */
720 m_freem(m);
721 return (EBUSY);
722 }
723
724 static void
725 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
726 {
727 struct lagg_port *lp;
728 struct lagg_softc *sc;
729
730 if ((lp = ifp->if_lagg) == NULL)
731 return;
732
733 sc = lp->lp_softc;
734
735 LAGG_WLOCK(sc);
736 lp->lp_detaching = 1;
737 lagg_port_destroy(lp, 1);
738 LAGG_WUNLOCK(sc);
739 }
740
741 static void
742 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
743 {
744 struct lagg_softc *sc = lp->lp_softc;
745
746 strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
747 strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
748 rp->rp_prio = lp->lp_prio;
749 rp->rp_flags = lp->lp_flags;
750 if (sc->sc_portreq != NULL)
751 (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
752
753 /* Add protocol specific flags */
754 switch (sc->sc_proto) {
755 case LAGG_PROTO_FAILOVER:
756 if (lp == sc->sc_primary)
757 rp->rp_flags |= LAGG_PORT_MASTER;
758 if (lp == lagg_link_active(sc, sc->sc_primary))
759 rp->rp_flags |= LAGG_PORT_ACTIVE;
760 break;
761
762 case LAGG_PROTO_ROUNDROBIN:
763 case LAGG_PROTO_LOADBALANCE:
764 case LAGG_PROTO_ETHERCHANNEL:
765 if (LAGG_PORTACTIVE(lp))
766 rp->rp_flags |= LAGG_PORT_ACTIVE;
767 break;
768
769 case LAGG_PROTO_LACP:
770 /* LACP has a different definition of active */
771 if (lacp_port_isactive(lp))
772 rp->rp_flags |= LAGG_PORT_ACTIVE;
773 break;
774 }
775
776 }
777
778 static void
779 lagg_init(void *xsc)
780 {
781 struct lagg_softc *sc = (struct lagg_softc *)xsc;
782 struct lagg_port *lp;
783 struct ifnet *ifp = sc->sc_ifp;
784
785 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
786 return;
787
788 LAGG_WLOCK(sc);
789
790 ifp->if_drv_flags |= IFF_DRV_RUNNING;
791 /* Update the port lladdrs */
792 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
793 lagg_port_lladdr(lp, IF_LLADDR(ifp));
794
795 if (sc->sc_init != NULL)
796 (*sc->sc_init)(sc);
797
798 LAGG_WUNLOCK(sc);
799 }
800
801 static void
802 lagg_stop(struct lagg_softc *sc)
803 {
804 struct ifnet *ifp = sc->sc_ifp;
805
806 LAGG_WLOCK_ASSERT(sc);
807
808 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
809 return;
810
811 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
812
813 if (sc->sc_stop != NULL)
814 (*sc->sc_stop)(sc);
815 }
816
817 static int
818 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
819 {
820 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
821 struct lagg_reqall *ra = (struct lagg_reqall *)data;
822 struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
823 struct ifreq *ifr = (struct ifreq *)data;
824 struct lagg_port *lp;
825 struct ifnet *tpif;
826 struct thread *td = curthread;
827 char *buf, *outbuf;
828 int count, buflen, len, error = 0;
829
830 bzero(&rpbuf, sizeof(rpbuf));
831
832 switch (cmd) {
833 case SIOCGLAGG:
834 LAGG_RLOCK(sc);
835 count = 0;
836 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
837 count++;
838 buflen = count * sizeof(struct lagg_reqport);
839 LAGG_RUNLOCK(sc);
840
841 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
842
843 LAGG_RLOCK(sc);
844 ra->ra_proto = sc->sc_proto;
845 if (sc->sc_req != NULL)
846 (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
847
848 count = 0;
849 buf = outbuf;
850 len = min(ra->ra_size, buflen);
851 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
852 if (len < sizeof(rpbuf))
853 break;
854
855 lagg_port2req(lp, &rpbuf);
856 memcpy(buf, &rpbuf, sizeof(rpbuf));
857 count++;
858 buf += sizeof(rpbuf);
859 len -= sizeof(rpbuf);
860 }
861 LAGG_RUNLOCK(sc);
862 ra->ra_ports = count;
863 ra->ra_size = count * sizeof(rpbuf);
864 error = copyout(outbuf, ra->ra_port, ra->ra_size);
865 free(outbuf, M_TEMP);
866 break;
867 case SIOCSLAGG:
868 error = suser(td);
869 if (error)
870 break;
871 if (ra->ra_proto >= LAGG_PROTO_MAX) {
872 error = EPROTONOSUPPORT;
873 break;
874 }
875 if (sc->sc_proto != LAGG_PROTO_NONE) {
876 LAGG_WLOCK(sc);
877 error = sc->sc_detach(sc);
878 /* Reset protocol and pointers */
879 sc->sc_proto = LAGG_PROTO_NONE;
880 sc->sc_detach = NULL;
881 sc->sc_start = NULL;
882 sc->sc_input = NULL;
883 sc->sc_port_create = NULL;
884 sc->sc_port_destroy = NULL;
885 sc->sc_linkstate = NULL;
886 sc->sc_init = NULL;
887 sc->sc_stop = NULL;
888 sc->sc_lladdr = NULL;
889 sc->sc_req = NULL;
890 sc->sc_portreq = NULL;
891 LAGG_WUNLOCK(sc);
892 }
893 if (error != 0)
894 break;
895 for (int i = 0; i < (sizeof(lagg_protos) /
896 sizeof(lagg_protos[0])); i++) {
897 if (lagg_protos[i].ti_proto == ra->ra_proto) {
898 if (sc->sc_ifflags & IFF_DEBUG)
899 printf("%s: using proto %u\n",
900 sc->sc_ifname,
901 lagg_protos[i].ti_proto);
902 LAGG_WLOCK(sc);
903 sc->sc_proto = lagg_protos[i].ti_proto;
904 if (sc->sc_proto != LAGG_PROTO_NONE)
905 error = lagg_protos[i].ti_attach(sc);
906 LAGG_WUNLOCK(sc);
907 return (error);
908 }
909 }
910 error = EPROTONOSUPPORT;
911 break;
912 case SIOCGLAGGPORT:
913 if (rp->rp_portname[0] == '\0' ||
914 (tpif = ifunit(rp->rp_portname)) == NULL) {
915 error = EINVAL;
916 break;
917 }
918
919 LAGG_RLOCK(sc);
920 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
921 lp->lp_softc != sc) {
922 error = ENOENT;
923 LAGG_RUNLOCK(sc);
924 break;
925 }
926
927 lagg_port2req(lp, rp);
928 LAGG_RUNLOCK(sc);
929 break;
930 case SIOCSLAGGPORT:
931 error = suser(td);
932 if (error)
933 break;
934 if (rp->rp_portname[0] == '\0' ||
935 (tpif = ifunit(rp->rp_portname)) == NULL) {
936 error = EINVAL;
937 break;
938 }
939 LAGG_WLOCK(sc);
940 error = lagg_port_create(sc, tpif);
941 LAGG_WUNLOCK(sc);
942 break;
943 case SIOCSLAGGDELPORT:
944 error = suser(td);
945 if (error)
946 break;
947 if (rp->rp_portname[0] == '\0' ||
948 (tpif = ifunit(rp->rp_portname)) == NULL) {
949 error = EINVAL;
950 break;
951 }
952
953 LAGG_WLOCK(sc);
954 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
955 lp->lp_softc != sc) {
956 error = ENOENT;
957 LAGG_WUNLOCK(sc);
958 break;
959 }
960
961 error = lagg_port_destroy(lp, 1);
962 LAGG_WUNLOCK(sc);
963 break;
964 case SIOCSIFFLAGS:
965 /* Set flags on ports too */
966 LAGG_WLOCK(sc);
967 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
968 lagg_setflags(lp, 1);
969 }
970 LAGG_WUNLOCK(sc);
971
972 if (!(ifp->if_flags & IFF_UP) &&
973 (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
974 /*
975 * If interface is marked down and it is running,
976 * then stop and disable it.
977 */
978 LAGG_WLOCK(sc);
979 lagg_stop(sc);
980 LAGG_WUNLOCK(sc);
981 } else if ((ifp->if_flags & IFF_UP) &&
982 !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
983 /*
984 * If interface is marked up and it is stopped, then
985 * start it.
986 */
987 (*ifp->if_init)(sc);
988 }
989 break;
990 case SIOCADDMULTI:
991 case SIOCDELMULTI:
992 LAGG_WLOCK(sc);
993 error = lagg_ether_setmulti(sc);
994 LAGG_WUNLOCK(sc);
995 break;
996 case SIOCSIFMEDIA:
997 case SIOCGIFMEDIA:
998 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
999 break;
1000
1001 case SIOCSIFCAP:
1002 case SIOCSIFMTU:
1003 /* Do not allow the MTU or caps to be directly changed */
1004 error = EINVAL;
1005 break;
1006
1007 default:
1008 error = ether_ioctl(ifp, cmd, data);
1009 break;
1010 }
1011 return (error);
1012 }
1013
1014 static int
1015 lagg_ether_setmulti(struct lagg_softc *sc)
1016 {
1017 struct ifnet *scifp = sc->sc_ifp;
1018 struct ifnet *ifp;
1019 struct ifmultiaddr *ifma, *rifma = NULL;
1020 struct lagg_port *lp;
1021 struct lagg_mc *mc;
1022 struct sockaddr_dl sdl;
1023 int error;
1024
1025 LAGG_WLOCK_ASSERT(sc);
1026
1027 bzero((char *)&sdl, sizeof(sdl));
1028 sdl.sdl_len = sizeof(sdl);
1029 sdl.sdl_family = AF_LINK;
1030 sdl.sdl_type = IFT_ETHER;
1031 sdl.sdl_alen = ETHER_ADDR_LEN;
1032
1033 /* First, remove any existing filter entries. */
1034 lagg_ether_purgemulti(sc);
1035
1036 /* Now program new ones. */
1037 TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
1038 if (ifma->ifma_addr->sa_family != AF_LINK)
1039 continue;
1040 mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
1041 if (mc == NULL)
1042 return (ENOMEM);
1043 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1044 (char *)&mc->mc_addr, ETHER_ADDR_LEN);
1045 SLIST_INSERT_HEAD(&sc->sc_mc_head, mc, mc_entries);
1046 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1047 LLADDR(&sdl), ETHER_ADDR_LEN);
1048
1049 /* do all the ports */
1050 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1051 ifp = lp->lp_ifp;
1052 sdl.sdl_index = ifp->if_index;
1053 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
1054 if (error)
1055 return (error);
1056 }
1057 }
1058 return (0);
1059 }
1060
1061 static int
1062 lagg_ether_cmdmulti(struct lagg_port *lp, int set)
1063 {
1064 struct lagg_softc *sc = lp->lp_softc;
1065 struct ifnet *ifp = lp->lp_ifp;;
1066 struct lagg_mc *mc;
1067 struct ifmultiaddr *rifma = NULL;
1068 struct sockaddr_dl sdl;
1069 int error;
1070
1071 LAGG_WLOCK_ASSERT(sc);
1072
1073 bzero((char *)&sdl, sizeof(sdl));
1074 sdl.sdl_len = sizeof(sdl);
1075 sdl.sdl_family = AF_LINK;
1076 sdl.sdl_type = IFT_ETHER;
1077 sdl.sdl_alen = ETHER_ADDR_LEN;
1078 sdl.sdl_index = ifp->if_index;
1079
1080 SLIST_FOREACH(mc, &sc->sc_mc_head, mc_entries) {
1081 bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN);
1082
1083 if (set)
1084 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
1085 else
1086 error = if_delmulti(ifp, (struct sockaddr *)&sdl);
1087
1088 if (error) {
1089 printf("cmdmulti error on %s, set = %d\n",
1090 ifp->if_xname, set);
1091 return (error);
1092 }
1093 }
1094 return (0);
1095 }
1096
1097 static void
1098 lagg_ether_purgemulti(struct lagg_softc *sc)
1099 {
1100 struct lagg_port *lp;
1101 struct lagg_mc *mc;
1102
1103 LAGG_WLOCK_ASSERT(sc);
1104
1105 /* remove from ports */
1106 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1107 lagg_ether_cmdmulti(lp, 0);
1108
1109 while ((mc = SLIST_FIRST(&sc->sc_mc_head)) != NULL) {
1110 SLIST_REMOVE(&sc->sc_mc_head, mc, lagg_mc, mc_entries);
1111 free(mc, M_DEVBUF);
1112 }
1113 }
1114
1115 /* Handle a ref counted flag that should be set on the lagg port as well */
1116 static int
1117 lagg_setflag(struct lagg_port *lp, int flag, int status,
1118 int (*func)(struct ifnet *, int))
1119 {
1120 struct lagg_softc *sc = lp->lp_softc;
1121 struct ifnet *scifp = sc->sc_ifp;
1122 struct ifnet *ifp = lp->lp_ifp;
1123 int error;
1124
1125 LAGG_WLOCK_ASSERT(sc);
1126
1127 status = status ? (scifp->if_flags & flag) : 0;
1128 /* Now "status" contains the flag value or 0 */
1129
1130 /*
1131 * See if recorded ports status is different from what
1132 * we want it to be. If it is, flip it. We record ports
1133 * status in lp_ifflags so that we won't clear ports flag
1134 * we haven't set. In fact, we don't clear or set ports
1135 * flags directly, but get or release references to them.
1136 * That's why we can be sure that recorded flags still are
1137 * in accord with actual ports flags.
1138 */
1139 if (status != (lp->lp_ifflags & flag)) {
1140 error = (*func)(ifp, status);
1141 if (error)
1142 return (error);
1143 lp->lp_ifflags &= ~flag;
1144 lp->lp_ifflags |= status;
1145 }
1146 return (0);
1147 }
1148
1149 /*
1150 * Handle IFF_* flags that require certain changes on the lagg port
1151 * if "status" is true, update ports flags respective to the lagg
1152 * if "status" is false, forcedly clear the flags set on port.
1153 */
1154 static int
1155 lagg_setflags(struct lagg_port *lp, int status)
1156 {
1157 int error, i;
1158
1159 for (i = 0; lagg_pflags[i].flag; i++) {
1160 error = lagg_setflag(lp, lagg_pflags[i].flag,
1161 status, lagg_pflags[i].func);
1162 if (error)
1163 return (error);
1164 }
1165 return (0);
1166 }
1167
1168 static void
1169 lagg_start(struct ifnet *ifp)
1170 {
1171 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1172 struct mbuf *m;
1173 int error = 0;
1174
1175 LAGG_RLOCK(sc);
1176 for (;; error = 0) {
1177 IFQ_DEQUEUE(&ifp->if_snd, m);
1178 if (m == NULL)
1179 break;
1180
1181 BPF_MTAP(ifp, m);
1182
1183 if (sc->sc_proto != LAGG_PROTO_NONE)
1184 error = (*sc->sc_start)(sc, m);
1185 else
1186 m_freem(m);
1187
1188 if (error == 0)
1189 ifp->if_opackets++;
1190 else
1191 ifp->if_oerrors++;
1192 }
1193 LAGG_RUNLOCK(sc);
1194
1195 return;
1196 }
1197
1198 static struct mbuf *
1199 lagg_input(struct ifnet *ifp, struct mbuf *m)
1200 {
1201 struct lagg_port *lp = ifp->if_lagg;
1202 struct lagg_softc *sc = lp->lp_softc;
1203 struct ifnet *scifp = sc->sc_ifp;
1204
1205 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1206 (lp->lp_flags & LAGG_PORT_DISABLED) ||
1207 sc->sc_proto == LAGG_PROTO_NONE) {
1208 m_freem(m);
1209 return (NULL);
1210 }
1211
1212 LAGG_RLOCK(sc);
1213 BPF_MTAP(scifp, m);
1214
1215 m = (*sc->sc_input)(sc, lp, m);
1216
1217 if (m != NULL) {
1218 scifp->if_ipackets++;
1219 scifp->if_ibytes += m->m_pkthdr.len;
1220 }
1221
1222 LAGG_RUNLOCK(sc);
1223 return (m);
1224 }
1225
1226 static int
1227 lagg_media_change(struct ifnet *ifp)
1228 {
1229 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1230
1231 if (sc->sc_ifflags & IFF_DEBUG)
1232 printf("%s\n", __func__);
1233
1234 /* Ignore */
1235 return (0);
1236 }
1237
1238 static void
1239 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1240 {
1241 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1242 struct lagg_port *lp;
1243
1244 imr->ifm_status = IFM_AVALID;
1245 imr->ifm_active = IFM_ETHER | IFM_AUTO;
1246
1247 LAGG_RLOCK(sc);
1248 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1249 if (LAGG_PORTACTIVE(lp))
1250 imr->ifm_status |= IFM_ACTIVE;
1251 }
1252 LAGG_RUNLOCK(sc);
1253 }
1254
1255 static void
1256 lagg_linkstate(struct lagg_softc *sc)
1257 {
1258 struct lagg_port *lp;
1259 int new_link = LINK_STATE_DOWN;
1260
1261 /* Our link is considered up if at least one of our ports is active */
1262 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1263 if (lp->lp_link_state == LINK_STATE_UP) {
1264 new_link = LINK_STATE_UP;
1265 break;
1266 }
1267 }
1268 if_link_state_change(sc->sc_ifp, new_link);
1269 }
1270
1271 static void
1272 lagg_port_state(struct ifnet *ifp, int state)
1273 {
1274 struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
1275 struct lagg_softc *sc = NULL;
1276
1277 if (lp != NULL)
1278 sc = lp->lp_softc;
1279 if (sc == NULL)
1280 return;
1281
1282 LAGG_WLOCK(sc);
1283 lagg_linkstate(sc);
1284 if (sc->sc_linkstate != NULL)
1285 (*sc->sc_linkstate)(lp);
1286 LAGG_WUNLOCK(sc);
1287 }
1288
1289 struct lagg_port *
1290 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
1291 {
1292 struct lagg_port *lp_next, *rval = NULL;
1293 // int new_link = LINK_STATE_DOWN;
1294
1295 LAGG_RLOCK_ASSERT(sc);
1296 /*
1297 * Search a port which reports an active link state.
1298 */
1299
1300 if (lp == NULL)
1301 goto search;
1302 if (LAGG_PORTACTIVE(lp)) {
1303 rval = lp;
1304 goto found;
1305 }
1306 if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
1307 LAGG_PORTACTIVE(lp_next)) {
1308 rval = lp_next;
1309 goto found;
1310 }
1311
1312 search:
1313 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1314 if (LAGG_PORTACTIVE(lp_next)) {
1315 rval = lp_next;
1316 goto found;
1317 }
1318 }
1319
1320 found:
1321 if (rval != NULL) {
1322 /*
1323 * The IEEE 802.1D standard assumes that a lagg with
1324 * multiple ports is always full duplex. This is valid
1325 * for load sharing laggs and if at least two links
1326 * are active. Unfortunately, checking the latter would
1327 * be too expensive at this point.
1328 XXX
1329 if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
1330 (sc->sc_count > 1))
1331 new_link = LINK_STATE_FULL_DUPLEX;
1332 else
1333 new_link = rval->lp_link_state;
1334 */
1335 }
1336
1337 return (rval);
1338 }
1339
1340 static const void *
1341 lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
1342 {
1343 if (m->m_pkthdr.len < (off + len)) {
1344 return (NULL);
1345 } else if (m->m_len < (off + len)) {
1346 m_copydata(m, off, len, buf);
1347 return (buf);
1348 }
1349 return (mtod(m, char *) + off);
1350 }
1351
1352 uint32_t
1353 lagg_hashmbuf(struct mbuf *m, uint32_t key)
1354 {
1355 uint16_t etype;
1356 uint32_t p = 0;
1357 int off;
1358 struct ether_header *eh;
1359 struct ether_vlan_header vlanbuf;
1360 const struct ether_vlan_header *vlan;
1361 struct m_tag *mtag;
1362 u_int tag;
1363 #ifdef INET
1364 const struct ip *ip;
1365 struct ip ipbuf;
1366 #endif
1367 #ifdef INET6
1368 const struct ip6_hdr *ip6;
1369 struct ip6_hdr ip6buf;
1370 uint32_t flow;
1371 #endif
1372
1373 off = sizeof(*eh);
1374 if (m->m_len < off)
1375 goto out;
1376 eh = mtod(m, struct ether_header *);
1377 etype = ntohs(eh->ether_type);
1378 p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key);
1379 p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
1380
1381 /* Special handling for encapsulating VLAN frames */
1382 if (m->m_flags & M_VLANTAG) {
1383 mtag = m_tag_locate(m, MTAG_VLAN, MTAG_VLAN_TAG, NULL);
1384 KASSERT(mtag != NULL,
1385 ("%s: M_VLANTAG without m_tag", __func__));
1386 tag = EVL_VLANOFTAG(VLAN_TAG_VALUE(mtag));
1387 p = hash32_buf(&tag, sizeof(tag), p);
1388 } else if (etype == ETHERTYPE_VLAN) {
1389 vlan = lagg_gethdr(m, off, sizeof(*vlan), &vlanbuf);
1390 if (vlan == NULL)
1391 goto out;
1392
1393 p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
1394 etype = ntohs(vlan->evl_proto);
1395 off += sizeof(*vlan) - sizeof(*eh);
1396 }
1397
1398 switch (etype) {
1399 #ifdef INET
1400 case ETHERTYPE_IP:
1401 ip = lagg_gethdr(m, off, sizeof(*ip), &ipbuf);
1402 if (ip == NULL)
1403 goto out;
1404
1405 p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
1406 p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
1407 break;
1408 #endif
1409 #ifdef INET6
1410 case ETHERTYPE_IPV6:
1411 ip6 = lagg_gethdr(m, off, sizeof(*ip6), &ip6buf);
1412 if (ip6 == NULL)
1413 goto out;
1414
1415 p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
1416 p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
1417 flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
1418 p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
1419 break;
1420 #endif
1421 }
1422 out:
1423 return (p);
1424 }
1425
1426 int
1427 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
1428 {
1429 int error = 0;
1430
1431 IFQ_HANDOFF(ifp, m, error);
1432 if (error)
1433 ifp->if_oerrors++;
1434 return (error);
1435 }
1436
1437 /*
1438 * Simple round robin aggregation
1439 */
1440
1441 static int
1442 lagg_rr_attach(struct lagg_softc *sc)
1443 {
1444 sc->sc_detach = lagg_rr_detach;
1445 sc->sc_start = lagg_rr_start;
1446 sc->sc_input = lagg_rr_input;
1447 sc->sc_port_create = NULL;
1448 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1449 sc->sc_seq = 0;
1450
1451 return (0);
1452 }
1453
1454 static int
1455 lagg_rr_detach(struct lagg_softc *sc)
1456 {
1457 return (0);
1458 }
1459
1460 static int
1461 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
1462 {
1463 struct lagg_port *lp;
1464 uint32_t p;
1465
1466 p = atomic_fetchadd_32(&sc->sc_seq, 1);
1467 p %= sc->sc_count;
1468 lp = SLIST_FIRST(&sc->sc_ports);
1469 while (p--)
1470 lp = SLIST_NEXT(lp, lp_entries);
1471
1472 /*
1473 * Check the port's link state. This will return the next active
1474 * port if the link is down or the port is NULL.
1475 */
1476 if ((lp = lagg_link_active(sc, lp)) == NULL) {
1477 m_freem(m);
1478 return (ENOENT);
1479 }
1480
1481 /* Send mbuf */
1482 return (lagg_enqueue(lp->lp_ifp, m));
1483 }
1484
1485 static struct mbuf *
1486 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1487 {
1488 struct ifnet *ifp = sc->sc_ifp;
1489
1490 /* Just pass in the packet to our lagg device */
1491 m->m_pkthdr.rcvif = ifp;
1492
1493 return (m);
1494 }
1495
1496 /*
1497 * Active failover
1498 */
1499
1500 static int
1501 lagg_fail_attach(struct lagg_softc *sc)
1502 {
1503 sc->sc_detach = lagg_fail_detach;
1504 sc->sc_start = lagg_fail_start;
1505 sc->sc_input = lagg_fail_input;
1506 sc->sc_port_create = NULL;
1507 sc->sc_port_destroy = NULL;
1508
1509 return (0);
1510 }
1511
1512 static int
1513 lagg_fail_detach(struct lagg_softc *sc)
1514 {
1515 return (0);
1516 }
1517
1518 static int
1519 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
1520 {
1521 struct lagg_port *lp;
1522
1523 /* Use the master port if active or the next available port */
1524 if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
1525 m_freem(m);
1526 return (ENOENT);
1527 }
1528
1529 /* Send mbuf */
1530 return (lagg_enqueue(lp->lp_ifp, m));
1531 }
1532
1533 static struct mbuf *
1534 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1535 {
1536 struct ifnet *ifp = sc->sc_ifp;
1537 struct lagg_port *tmp_tp;
1538
1539 if (lp == sc->sc_primary) {
1540 m->m_pkthdr.rcvif = ifp;
1541 return (m);
1542 }
1543
1544 if (!LAGG_PORTACTIVE(sc->sc_primary)) {
1545 tmp_tp = lagg_link_active(sc, sc->sc_primary);
1546 /*
1547 * If tmp_tp is null, we've recieved a packet when all
1548 * our links are down. Weird, but process it anyways.
1549 */
1550 if ((tmp_tp == NULL || tmp_tp == lp)) {
1551 m->m_pkthdr.rcvif = ifp;
1552 return (m);
1553 }
1554 }
1555
1556 m_freem(m);
1557 return (NULL);
1558 }
1559
1560 /*
1561 * Loadbalancing
1562 */
1563
1564 static int
1565 lagg_lb_attach(struct lagg_softc *sc)
1566 {
1567 struct lagg_port *lp;
1568 struct lagg_lb *lb;
1569
1570 if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
1571 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
1572 return (ENOMEM);
1573
1574 sc->sc_detach = lagg_lb_detach;
1575 sc->sc_start = lagg_lb_start;
1576 sc->sc_input = lagg_lb_input;
1577 sc->sc_port_create = lagg_lb_port_create;
1578 sc->sc_port_destroy = lagg_lb_port_destroy;
1579 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1580
1581 lb->lb_key = arc4random();
1582 sc->sc_psc = (caddr_t)lb;
1583
1584 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1585 lagg_lb_port_create(lp);
1586
1587 return (0);
1588 }
1589
1590 static int
1591 lagg_lb_detach(struct lagg_softc *sc)
1592 {
1593 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1594 if (lb != NULL)
1595 free(lb, M_DEVBUF);
1596 return (0);
1597 }
1598
1599 static int
1600 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
1601 {
1602 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1603 struct lagg_port *lp_next;
1604 int i = 0;
1605
1606 bzero(&lb->lb_ports, sizeof(lb->lb_ports));
1607 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1608 if (lp_next == lp)
1609 continue;
1610 if (i >= LAGG_MAX_PORTS)
1611 return (EINVAL);
1612 if (sc->sc_ifflags & IFF_DEBUG)
1613 printf("%s: port %s at index %d\n",
1614 sc->sc_ifname, lp_next->lp_ifname, i);
1615 lb->lb_ports[i++] = lp_next;
1616 }
1617
1618 return (0);
1619 }
1620
1621 static int
1622 lagg_lb_port_create(struct lagg_port *lp)
1623 {
1624 struct lagg_softc *sc = lp->lp_softc;
1625 return (lagg_lb_porttable(sc, NULL));
1626 }
1627
1628 static void
1629 lagg_lb_port_destroy(struct lagg_port *lp)
1630 {
1631 struct lagg_softc *sc = lp->lp_softc;
1632 lagg_lb_porttable(sc, lp);
1633 }
1634
1635 static int
1636 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
1637 {
1638 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1639 struct lagg_port *lp = NULL;
1640 uint32_t p = 0;
1641 int idx;
1642
1643 p = lagg_hashmbuf(m, lb->lb_key);
1644 if ((idx = p % sc->sc_count) >= LAGG_MAX_PORTS) {
1645 m_freem(m);
1646 return (EINVAL);
1647 }
1648 lp = lb->lb_ports[idx];
1649
1650 /*
1651 * Check the port's link state. This will return the next active
1652 * port if the link is down or the port is NULL.
1653 */
1654 if ((lp = lagg_link_active(sc, lp)) == NULL) {
1655 m_freem(m);
1656 return (ENOENT);
1657 }
1658
1659 /* Send mbuf */
1660 return (lagg_enqueue(lp->lp_ifp, m));
1661 }
1662
1663 static struct mbuf *
1664 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1665 {
1666 struct ifnet *ifp = sc->sc_ifp;
1667
1668 /* Just pass in the packet to our lagg device */
1669 m->m_pkthdr.rcvif = ifp;
1670
1671 return (m);
1672 }
1673
1674 /*
1675 * 802.3ad LACP
1676 */
1677
1678 static int
1679 lagg_lacp_attach(struct lagg_softc *sc)
1680 {
1681 struct lagg_port *lp;
1682 int error;
1683
1684 sc->sc_detach = lagg_lacp_detach;
1685 sc->sc_port_create = lacp_port_create;
1686 sc->sc_port_destroy = lacp_port_destroy;
1687 sc->sc_linkstate = lacp_linkstate;
1688 sc->sc_start = lagg_lacp_start;
1689 sc->sc_input = lagg_lacp_input;
1690 sc->sc_init = lacp_init;
1691 sc->sc_stop = lacp_stop;
1692 sc->sc_lladdr = lagg_lacp_lladdr;
1693 sc->sc_req = lacp_req;
1694 sc->sc_portreq = lacp_portreq;
1695
1696 error = lacp_attach(sc);
1697 if (error)
1698 return (error);
1699
1700 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1701 lacp_port_create(lp);
1702
1703 return (error);
1704 }
1705
1706 static int
1707 lagg_lacp_detach(struct lagg_softc *sc)
1708 {
1709 struct lagg_port *lp;
1710 int error;
1711
1712 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1713 lacp_port_destroy(lp);
1714
1715 /* unlocking is safe here */
1716 LAGG_WUNLOCK(sc);
1717 error = lacp_detach(sc);
1718 LAGG_WLOCK(sc);
1719
1720 return (error);
1721 }
1722
1723 static void
1724 lagg_lacp_lladdr(struct lagg_softc *sc)
1725 {
1726 struct lagg_port *lp;
1727
1728 /* purge all the lacp ports */
1729 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1730 lacp_port_destroy(lp);
1731
1732 /* add them back in */
1733 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1734 lacp_port_create(lp);
1735 }
1736
1737 static int
1738 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
1739 {
1740 struct lagg_port *lp;
1741
1742 lp = lacp_select_tx_port(sc, m);
1743 if (lp == NULL) {
1744 m_freem(m);
1745 return (EBUSY);
1746 }
1747
1748 /* Send mbuf */
1749 return (lagg_enqueue(lp->lp_ifp, m));
1750 }
1751
1752 static struct mbuf *
1753 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1754 {
1755 struct ifnet *ifp = sc->sc_ifp;
1756 struct ether_header *eh;
1757 u_short etype;
1758
1759 eh = mtod(m, struct ether_header *);
1760 etype = ntohs(eh->ether_type);
1761
1762 /* Tap off LACP control messages */
1763 if (etype == ETHERTYPE_SLOW) {
1764 lacp_input(lp, m);
1765 return (NULL);
1766 }
1767
1768 /*
1769 * If the port is not collecting or not in the active aggregator then
1770 * free and return.
1771 */
1772 if ((lp->lp_flags & LAGG_PORT_COLLECTING) == 0 ||
1773 lacp_port_isactive(lp) == 0) {
1774 m_freem(m);
1775 return (NULL);
1776 }
1777
1778 m->m_pkthdr.rcvif = ifp;
1779 return (m);
1780 }
Cache object: 3078480f19f61befc6a2082d56263620
|