FreeBSD/Linux Kernel Cross Reference
sys/net/if_lagg.c
1 /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */
2
3 /*
4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
5 * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 #include <sys/cdefs.h>
21 __FBSDID("$FreeBSD$");
22
23 #include "opt_inet.h"
24 #include "opt_inet6.h"
25
26 #include <sys/param.h>
27 #include <sys/kernel.h>
28 #include <sys/malloc.h>
29 #include <sys/mbuf.h>
30 #include <sys/queue.h>
31 #include <sys/socket.h>
32 #include <sys/sockio.h>
33 #include <sys/sysctl.h>
34 #include <sys/module.h>
35 #include <sys/priv.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/hash.h>
39 #include <sys/lock.h>
40 #include <sys/rwlock.h>
41 #include <sys/taskqueue.h>
42 #include <sys/eventhandler.h>
43
44 #include <net/ethernet.h>
45 #include <net/if.h>
46 #include <net/if_clone.h>
47 #include <net/if_arp.h>
48 #include <net/if_dl.h>
49 #include <net/if_llc.h>
50 #include <net/if_media.h>
51 #include <net/if_types.h>
52 #include <net/if_var.h>
53 #include <net/bpf.h>
54
55 #ifdef INET
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/if_ether.h>
59 #include <netinet/ip.h>
60 #endif
61
62 #ifdef INET6
63 #include <netinet/ip6.h>
64 #endif
65
66 #include <net/if_vlan_var.h>
67 #include <net/if_lagg.h>
68 #include <net/ieee8023ad_lacp.h>
69
70 /* Special flags we should propagate to the lagg ports. */
71 static struct {
72 int flag;
73 int (*func)(struct ifnet *, int);
74 } lagg_pflags[] = {
75 {IFF_PROMISC, ifpromisc},
76 {IFF_ALLMULTI, if_allmulti},
77 {0, NULL}
78 };
79
80 SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */
81 static struct mtx lagg_list_mtx;
82 eventhandler_tag lagg_detach_cookie = NULL;
83
84 static int lagg_clone_create(struct if_clone *, int, caddr_t);
85 static void lagg_clone_destroy(struct ifnet *);
86 static void lagg_lladdr(struct lagg_softc *, uint8_t *);
87 static void lagg_capabilities(struct lagg_softc *);
88 static void lagg_port_lladdr(struct lagg_port *, uint8_t *);
89 static void lagg_port_setlladdr(void *, int);
90 static int lagg_port_create(struct lagg_softc *, struct ifnet *);
91 static int lagg_port_destroy(struct lagg_port *, int);
92 static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
93 static void lagg_linkstate(struct lagg_softc *);
94 static void lagg_port_state(struct ifnet *, int);
95 static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
96 static int lagg_port_output(struct ifnet *, struct mbuf *,
97 struct sockaddr *, struct route *);
98 static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
99 static int lagg_port_checkstacking(struct lagg_softc *);
100 static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
101 static void lagg_init(void *);
102 static void lagg_stop(struct lagg_softc *);
103 static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
104 static int lagg_ether_setmulti(struct lagg_softc *);
105 static int lagg_ether_cmdmulti(struct lagg_port *, int);
106 static int lagg_setflag(struct lagg_port *, int, int,
107 int (*func)(struct ifnet *, int));
108 static int lagg_setflags(struct lagg_port *, int status);
109 static void lagg_start(struct ifnet *);
110 static int lagg_media_change(struct ifnet *);
111 static void lagg_media_status(struct ifnet *, struct ifmediareq *);
112 static struct lagg_port *lagg_link_active(struct lagg_softc *,
113 struct lagg_port *);
114 static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
115
116 IFC_SIMPLE_DECLARE(lagg, 0);
117
118 /* Simple round robin */
119 static int lagg_rr_attach(struct lagg_softc *);
120 static int lagg_rr_detach(struct lagg_softc *);
121 static int lagg_rr_start(struct lagg_softc *, struct mbuf *);
122 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
123 struct mbuf *);
124
125 /* Active failover */
126 static int lagg_fail_attach(struct lagg_softc *);
127 static int lagg_fail_detach(struct lagg_softc *);
128 static int lagg_fail_start(struct lagg_softc *, struct mbuf *);
129 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
130 struct mbuf *);
131
132 /* Loadbalancing */
133 static int lagg_lb_attach(struct lagg_softc *);
134 static int lagg_lb_detach(struct lagg_softc *);
135 static int lagg_lb_port_create(struct lagg_port *);
136 static void lagg_lb_port_destroy(struct lagg_port *);
137 static int lagg_lb_start(struct lagg_softc *, struct mbuf *);
138 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
139 struct mbuf *);
140 static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
141
142 /* 802.3ad LACP */
143 static int lagg_lacp_attach(struct lagg_softc *);
144 static int lagg_lacp_detach(struct lagg_softc *);
145 static int lagg_lacp_start(struct lagg_softc *, struct mbuf *);
146 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
147 struct mbuf *);
148 static void lagg_lacp_lladdr(struct lagg_softc *);
149
150 /* lagg protocol table */
151 static const struct {
152 int ti_proto;
153 int (*ti_attach)(struct lagg_softc *);
154 } lagg_protos[] = {
155 { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach },
156 { LAGG_PROTO_FAILOVER, lagg_fail_attach },
157 { LAGG_PROTO_LOADBALANCE, lagg_lb_attach },
158 { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach },
159 { LAGG_PROTO_LACP, lagg_lacp_attach },
160 { LAGG_PROTO_NONE, NULL }
161 };
162
163 SYSCTL_DECL(_net_link);
164 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation");
165
166 static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
167 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
168 &lagg_failover_rx_all, 0,
169 "Accept input from any interface in a failover lagg");
170 static int def_use_flowid = 1; /* Default value for using M_FLOWID */
171 TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
172 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
173 &def_use_flowid, 0,
174 "Default setting for using flow id for load sharing");
175
176 static int
177 lagg_modevent(module_t mod, int type, void *data)
178 {
179
180 switch (type) {
181 case MOD_LOAD:
182 mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
183 SLIST_INIT(&lagg_list);
184 if_clone_attach(&lagg_cloner);
185 lagg_input_p = lagg_input;
186 lagg_linkstate_p = lagg_port_state;
187 lagg_detach_cookie = EVENTHANDLER_REGISTER(
188 ifnet_departure_event, lagg_port_ifdetach, NULL,
189 EVENTHANDLER_PRI_ANY);
190 break;
191 case MOD_UNLOAD:
192 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
193 lagg_detach_cookie);
194 if_clone_detach(&lagg_cloner);
195 lagg_input_p = NULL;
196 lagg_linkstate_p = NULL;
197 mtx_destroy(&lagg_list_mtx);
198 break;
199 default:
200 return (EOPNOTSUPP);
201 }
202 return (0);
203 }
204
205 static moduledata_t lagg_mod = {
206 "if_lagg",
207 lagg_modevent,
208 0
209 };
210
211 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
212 MODULE_VERSION(if_lagg, 1);
213
214 #if __FreeBSD_version >= 800000
215 /*
216 * This routine is run via an vlan
217 * config EVENT
218 */
219 static void
220 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
221 {
222 struct lagg_softc *sc = ifp->if_softc;
223 struct lagg_port *lp;
224
225 if (ifp->if_softc != arg) /* Not our event */
226 return;
227
228 LAGG_RLOCK(sc);
229 if (!SLIST_EMPTY(&sc->sc_ports)) {
230 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
231 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
232 }
233 LAGG_RUNLOCK(sc);
234 }
235
236 /*
237 * This routine is run via an vlan
238 * unconfig EVENT
239 */
240 static void
241 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
242 {
243 struct lagg_softc *sc = ifp->if_softc;
244 struct lagg_port *lp;
245
246 if (ifp->if_softc != arg) /* Not our event */
247 return;
248
249 LAGG_RLOCK(sc);
250 if (!SLIST_EMPTY(&sc->sc_ports)) {
251 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
252 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
253 }
254 LAGG_RUNLOCK(sc);
255 }
256 #endif
257
258 static int
259 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
260 {
261 struct lagg_softc *sc;
262 struct ifnet *ifp;
263 int i, error = 0;
264 static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
265 struct sysctl_oid *oid;
266 char num[14]; /* sufficient for 32 bits */
267
268 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
269 ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
270 if (ifp == NULL) {
271 free(sc, M_DEVBUF);
272 return (ENOSPC);
273 }
274
275 sysctl_ctx_init(&sc->ctx);
276 snprintf(num, sizeof(num), "%u", unit);
277 sc->use_flowid = def_use_flowid;
278 oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
279 OID_AUTO, num, CTLFLAG_RD, NULL, "");
280 SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
281 "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
282 "Use flow id for load sharing");
283 /* Hash all layers by default */
284 sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
285
286 sc->sc_proto = LAGG_PROTO_NONE;
287 for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
288 if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
289 sc->sc_proto = lagg_protos[i].ti_proto;
290 if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
291 if_free_type(ifp, IFT_ETHER);
292 free(sc, M_DEVBUF);
293 return (error);
294 }
295 break;
296 }
297 }
298 LAGG_LOCK_INIT(sc);
299 SLIST_INIT(&sc->sc_ports);
300 TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
301
302 /* Initialise pseudo media types */
303 ifmedia_init(&sc->sc_media, 0, lagg_media_change,
304 lagg_media_status);
305 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
306 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
307
308 if_initname(ifp, ifc->ifc_name, unit);
309 ifp->if_type = IFT_ETHER;
310 ifp->if_softc = sc;
311 ifp->if_start = lagg_start;
312 ifp->if_init = lagg_init;
313 ifp->if_ioctl = lagg_ioctl;
314 ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
315
316 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
317 ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
318 IFQ_SET_READY(&ifp->if_snd);
319
320 /*
321 * Attach as an ordinary ethernet device, childs will be attached
322 * as special device IFT_IEEE8023ADLAG.
323 */
324 ether_ifattach(ifp, eaddr);
325
326 #if __FreeBSD_version >= 800000
327 sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
328 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
329 sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
330 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
331 #endif
332
333 /* Insert into the global list of laggs */
334 mtx_lock(&lagg_list_mtx);
335 SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
336 mtx_unlock(&lagg_list_mtx);
337
338 return (0);
339 }
340
341 static void
342 lagg_clone_destroy(struct ifnet *ifp)
343 {
344 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
345 struct lagg_port *lp;
346
347 LAGG_WLOCK(sc);
348
349 lagg_stop(sc);
350 ifp->if_flags &= ~IFF_UP;
351
352 #if __FreeBSD_version >= 800000
353 EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
354 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
355 #endif
356
357 /* Shutdown and remove lagg ports */
358 while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
359 lagg_port_destroy(lp, 1);
360 /* Unhook the aggregation protocol */
361 (*sc->sc_detach)(sc);
362
363 LAGG_WUNLOCK(sc);
364
365 sysctl_ctx_free(&sc->ctx);
366 ifmedia_removeall(&sc->sc_media);
367 ether_ifdetach(ifp);
368 if_free_type(ifp, IFT_ETHER);
369
370 mtx_lock(&lagg_list_mtx);
371 SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
372 mtx_unlock(&lagg_list_mtx);
373
374 taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
375 LAGG_LOCK_DESTROY(sc);
376 free(sc, M_DEVBUF);
377 }
378
379 static void
380 lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
381 {
382 struct ifnet *ifp = sc->sc_ifp;
383
384 if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
385 return;
386
387 bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
388 /* Let the protocol know the MAC has changed */
389 if (sc->sc_lladdr != NULL)
390 (*sc->sc_lladdr)(sc);
391 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
392 }
393
394 static void
395 lagg_capabilities(struct lagg_softc *sc)
396 {
397 struct lagg_port *lp;
398 int cap = ~0, ena = ~0;
399 u_long hwa = ~0UL;
400
401 LAGG_WLOCK_ASSERT(sc);
402
403 /* Get capabilities from the lagg ports */
404 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
405 cap &= lp->lp_ifp->if_capabilities;
406 ena &= lp->lp_ifp->if_capenable;
407 hwa &= lp->lp_ifp->if_hwassist;
408 }
409 cap = (cap == ~0 ? 0 : cap);
410 ena = (ena == ~0 ? 0 : ena);
411 hwa = (hwa == ~0 ? 0 : hwa);
412
413 if (sc->sc_ifp->if_capabilities != cap ||
414 sc->sc_ifp->if_capenable != ena ||
415 sc->sc_ifp->if_hwassist != hwa) {
416 sc->sc_ifp->if_capabilities = cap;
417 sc->sc_ifp->if_capenable = ena;
418 sc->sc_ifp->if_hwassist = hwa;
419 getmicrotime(&sc->sc_ifp->if_lastchange);
420
421 if (sc->sc_ifflags & IFF_DEBUG)
422 if_printf(sc->sc_ifp,
423 "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
424 }
425 }
426
427 static void
428 lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
429 {
430 struct lagg_softc *sc = lp->lp_softc;
431 struct ifnet *ifp = lp->lp_ifp;
432 struct lagg_llq *llq;
433 int pending = 0;
434
435 LAGG_WLOCK_ASSERT(sc);
436
437 if (lp->lp_detaching ||
438 memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
439 return;
440
441 /* Check to make sure its not already queued to be changed */
442 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
443 if (llq->llq_ifp == ifp) {
444 pending = 1;
445 break;
446 }
447 }
448
449 if (!pending) {
450 llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
451 if (llq == NULL) /* XXX what to do */
452 return;
453 }
454
455 /* Update the lladdr even if pending, it may have changed */
456 llq->llq_ifp = ifp;
457 bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
458
459 if (!pending)
460 SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
461
462 taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
463 }
464
465 /*
466 * Set the interface MAC address from a taskqueue to avoid a LOR.
467 */
468 static void
469 lagg_port_setlladdr(void *arg, int pending)
470 {
471 struct lagg_softc *sc = (struct lagg_softc *)arg;
472 struct lagg_llq *llq, *head;
473 struct ifnet *ifp;
474 int error;
475
476 /* Grab a local reference of the queue and remove it from the softc */
477 LAGG_WLOCK(sc);
478 head = SLIST_FIRST(&sc->sc_llq_head);
479 SLIST_FIRST(&sc->sc_llq_head) = NULL;
480 LAGG_WUNLOCK(sc);
481
482 /*
483 * Traverse the queue and set the lladdr on each ifp. It is safe to do
484 * unlocked as we have the only reference to it.
485 */
486 for (llq = head; llq != NULL; llq = head) {
487 ifp = llq->llq_ifp;
488
489 /* Set the link layer address */
490 CURVNET_SET(ifp->if_vnet);
491 error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
492 CURVNET_RESTORE();
493 if (error)
494 printf("%s: setlladdr failed on %s\n", __func__,
495 ifp->if_xname);
496
497 head = SLIST_NEXT(llq, llq_entries);
498 free(llq, M_DEVBUF);
499 }
500 }
501
502 static int
503 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
504 {
505 struct lagg_softc *sc_ptr;
506 struct lagg_port *lp;
507 int error = 0;
508
509 LAGG_WLOCK_ASSERT(sc);
510
511 /* Limit the maximal number of lagg ports */
512 if (sc->sc_count >= LAGG_MAX_PORTS)
513 return (ENOSPC);
514
515 /* Check if port has already been associated to a lagg */
516 if (ifp->if_lagg != NULL)
517 return (EBUSY);
518
519 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
520 if (ifp->if_type != IFT_ETHER)
521 return (EPROTONOSUPPORT);
522
523 /* Allow the first Ethernet member to define the MTU */
524 if (SLIST_EMPTY(&sc->sc_ports))
525 sc->sc_ifp->if_mtu = ifp->if_mtu;
526 else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
527 if_printf(sc->sc_ifp, "invalid MTU for %s\n",
528 ifp->if_xname);
529 return (EINVAL);
530 }
531
532 if ((lp = malloc(sizeof(struct lagg_port),
533 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
534 return (ENOMEM);
535
536 /* Check if port is a stacked lagg */
537 mtx_lock(&lagg_list_mtx);
538 SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
539 if (ifp == sc_ptr->sc_ifp) {
540 mtx_unlock(&lagg_list_mtx);
541 free(lp, M_DEVBUF);
542 return (EINVAL);
543 /* XXX disable stacking for the moment, its untested
544 lp->lp_flags |= LAGG_PORT_STACK;
545 if (lagg_port_checkstacking(sc_ptr) >=
546 LAGG_MAX_STACKING) {
547 mtx_unlock(&lagg_list_mtx);
548 free(lp, M_DEVBUF);
549 return (E2BIG);
550 }
551 */
552 }
553 }
554 mtx_unlock(&lagg_list_mtx);
555
556 /* Change the interface type */
557 lp->lp_iftype = ifp->if_type;
558 ifp->if_type = IFT_IEEE8023ADLAG;
559 ifp->if_lagg = lp;
560 lp->lp_ioctl = ifp->if_ioctl;
561 ifp->if_ioctl = lagg_port_ioctl;
562 lp->lp_output = ifp->if_output;
563 ifp->if_output = lagg_port_output;
564
565 lp->lp_ifp = ifp;
566 lp->lp_softc = sc;
567
568 /* Save port link layer address */
569 bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
570
571 if (SLIST_EMPTY(&sc->sc_ports)) {
572 sc->sc_primary = lp;
573 lagg_lladdr(sc, IF_LLADDR(ifp));
574 } else {
575 /* Update link layer address for this port */
576 lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
577 }
578
579 /* Insert into the list of ports */
580 SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
581 sc->sc_count++;
582
583 /* Update lagg capabilities */
584 lagg_capabilities(sc);
585 lagg_linkstate(sc);
586
587 /* Add multicast addresses and interface flags to this port */
588 lagg_ether_cmdmulti(lp, 1);
589 lagg_setflags(lp, 1);
590
591 if (sc->sc_port_create != NULL)
592 error = (*sc->sc_port_create)(lp);
593 if (error) {
594 /* remove the port again, without calling sc_port_destroy */
595 lagg_port_destroy(lp, 0);
596 return (error);
597 }
598
599 return (error);
600 }
601
602 static int
603 lagg_port_checkstacking(struct lagg_softc *sc)
604 {
605 struct lagg_softc *sc_ptr;
606 struct lagg_port *lp;
607 int m = 0;
608
609 LAGG_WLOCK_ASSERT(sc);
610
611 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
612 if (lp->lp_flags & LAGG_PORT_STACK) {
613 sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
614 m = MAX(m, lagg_port_checkstacking(sc_ptr));
615 }
616 }
617
618 return (m + 1);
619 }
620
621 static int
622 lagg_port_destroy(struct lagg_port *lp, int runpd)
623 {
624 struct lagg_softc *sc = lp->lp_softc;
625 struct lagg_port *lp_ptr;
626 struct lagg_llq *llq;
627 struct ifnet *ifp = lp->lp_ifp;
628
629 LAGG_WLOCK_ASSERT(sc);
630
631 if (runpd && sc->sc_port_destroy != NULL)
632 (*sc->sc_port_destroy)(lp);
633
634 /*
635 * Remove multicast addresses and interface flags from this port and
636 * reset the MAC address, skip if the interface is being detached.
637 */
638 if (!lp->lp_detaching) {
639 lagg_ether_cmdmulti(lp, 0);
640 lagg_setflags(lp, 0);
641 lagg_port_lladdr(lp, lp->lp_lladdr);
642 }
643
644 /* Restore interface */
645 ifp->if_type = lp->lp_iftype;
646 ifp->if_ioctl = lp->lp_ioctl;
647 ifp->if_output = lp->lp_output;
648 ifp->if_lagg = NULL;
649
650 /* Finally, remove the port from the lagg */
651 SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
652 sc->sc_count--;
653
654 /* Update the primary interface */
655 if (lp == sc->sc_primary) {
656 uint8_t lladdr[ETHER_ADDR_LEN];
657
658 if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
659 bzero(&lladdr, ETHER_ADDR_LEN);
660 } else {
661 bcopy(lp_ptr->lp_lladdr,
662 lladdr, ETHER_ADDR_LEN);
663 }
664 lagg_lladdr(sc, lladdr);
665 sc->sc_primary = lp_ptr;
666
667 /* Update link layer address for each port */
668 SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
669 lagg_port_lladdr(lp_ptr, lladdr);
670 }
671
672 /* Remove any pending lladdr changes from the queue */
673 if (lp->lp_detaching) {
674 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
675 if (llq->llq_ifp == ifp) {
676 SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
677 llq_entries);
678 free(llq, M_DEVBUF);
679 break; /* Only appears once */
680 }
681 }
682 }
683
684 if (lp->lp_ifflags)
685 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
686
687 free(lp, M_DEVBUF);
688
689 /* Update lagg capabilities */
690 lagg_capabilities(sc);
691 lagg_linkstate(sc);
692
693 return (0);
694 }
695
696 static int
697 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
698 {
699 struct lagg_reqport *rp = (struct lagg_reqport *)data;
700 struct lagg_softc *sc;
701 struct lagg_port *lp = NULL;
702 int error = 0;
703
704 /* Should be checked by the caller */
705 if (ifp->if_type != IFT_IEEE8023ADLAG ||
706 (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
707 goto fallback;
708
709 switch (cmd) {
710 case SIOCGLAGGPORT:
711 if (rp->rp_portname[0] == '\0' ||
712 ifunit(rp->rp_portname) != ifp) {
713 error = EINVAL;
714 break;
715 }
716
717 LAGG_RLOCK(sc);
718 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
719 error = ENOENT;
720 LAGG_RUNLOCK(sc);
721 break;
722 }
723
724 lagg_port2req(lp, rp);
725 LAGG_RUNLOCK(sc);
726 break;
727
728 case SIOCSIFCAP:
729 if (lp->lp_ioctl == NULL) {
730 error = EINVAL;
731 break;
732 }
733 error = (*lp->lp_ioctl)(ifp, cmd, data);
734 if (error)
735 break;
736
737 /* Update lagg interface capabilities */
738 LAGG_WLOCK(sc);
739 lagg_capabilities(sc);
740 LAGG_WUNLOCK(sc);
741 break;
742
743 case SIOCSIFMTU:
744 /* Do not allow the MTU to be changed once joined */
745 error = EINVAL;
746 break;
747
748 default:
749 goto fallback;
750 }
751
752 return (error);
753
754 fallback:
755 if (lp->lp_ioctl != NULL)
756 return ((*lp->lp_ioctl)(ifp, cmd, data));
757
758 return (EINVAL);
759 }
760
761 /*
762 * For direct output to child ports.
763 */
764 static int
765 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
766 struct sockaddr *dst, struct route *ro)
767 {
768 struct lagg_port *lp = ifp->if_lagg;
769
770 switch (dst->sa_family) {
771 case pseudo_AF_HDRCMPLT:
772 case AF_UNSPEC:
773 return ((*lp->lp_output)(ifp, m, dst, ro));
774 }
775
776 /* drop any other frames */
777 m_freem(m);
778 return (EBUSY);
779 }
780
781 static void
782 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
783 {
784 struct lagg_port *lp;
785 struct lagg_softc *sc;
786
787 if ((lp = ifp->if_lagg) == NULL)
788 return;
789 /* If the ifnet is just being renamed, don't do anything. */
790 if (ifp->if_flags & IFF_RENAMING)
791 return;
792
793 sc = lp->lp_softc;
794
795 LAGG_WLOCK(sc);
796 lp->lp_detaching = 1;
797 lagg_port_destroy(lp, 1);
798 LAGG_WUNLOCK(sc);
799 }
800
801 static void
802 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
803 {
804 struct lagg_softc *sc = lp->lp_softc;
805
806 strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
807 strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
808 rp->rp_prio = lp->lp_prio;
809 rp->rp_flags = lp->lp_flags;
810 if (sc->sc_portreq != NULL)
811 (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
812
813 /* Add protocol specific flags */
814 switch (sc->sc_proto) {
815 case LAGG_PROTO_FAILOVER:
816 if (lp == sc->sc_primary)
817 rp->rp_flags |= LAGG_PORT_MASTER;
818 if (lp == lagg_link_active(sc, sc->sc_primary))
819 rp->rp_flags |= LAGG_PORT_ACTIVE;
820 break;
821
822 case LAGG_PROTO_ROUNDROBIN:
823 case LAGG_PROTO_LOADBALANCE:
824 case LAGG_PROTO_ETHERCHANNEL:
825 if (LAGG_PORTACTIVE(lp))
826 rp->rp_flags |= LAGG_PORT_ACTIVE;
827 break;
828
829 case LAGG_PROTO_LACP:
830 /* LACP has a different definition of active */
831 if (lacp_isactive(lp))
832 rp->rp_flags |= LAGG_PORT_ACTIVE;
833 if (lacp_iscollecting(lp))
834 rp->rp_flags |= LAGG_PORT_COLLECTING;
835 if (lacp_isdistributing(lp))
836 rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
837 break;
838 }
839
840 }
841
842 static void
843 lagg_init(void *xsc)
844 {
845 struct lagg_softc *sc = (struct lagg_softc *)xsc;
846 struct lagg_port *lp;
847 struct ifnet *ifp = sc->sc_ifp;
848
849 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
850 return;
851
852 LAGG_WLOCK(sc);
853
854 ifp->if_drv_flags |= IFF_DRV_RUNNING;
855 /* Update the port lladdrs */
856 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
857 lagg_port_lladdr(lp, IF_LLADDR(ifp));
858
859 if (sc->sc_init != NULL)
860 (*sc->sc_init)(sc);
861
862 LAGG_WUNLOCK(sc);
863 }
864
865 static void
866 lagg_stop(struct lagg_softc *sc)
867 {
868 struct ifnet *ifp = sc->sc_ifp;
869
870 LAGG_WLOCK_ASSERT(sc);
871
872 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
873 return;
874
875 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
876
877 if (sc->sc_stop != NULL)
878 (*sc->sc_stop)(sc);
879 }
880
881 static int
882 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
883 {
884 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
885 struct lagg_reqall *ra = (struct lagg_reqall *)data;
886 struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
887 struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
888 struct ifreq *ifr = (struct ifreq *)data;
889 struct lagg_port *lp;
890 struct ifnet *tpif;
891 struct thread *td = curthread;
892 char *buf, *outbuf;
893 int count, buflen, len, error = 0;
894
895 bzero(&rpbuf, sizeof(rpbuf));
896
897 switch (cmd) {
898 case SIOCGLAGG:
899 LAGG_RLOCK(sc);
900 count = 0;
901 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
902 count++;
903 buflen = count * sizeof(struct lagg_reqport);
904 LAGG_RUNLOCK(sc);
905
906 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
907
908 LAGG_RLOCK(sc);
909 ra->ra_proto = sc->sc_proto;
910 if (sc->sc_req != NULL)
911 (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
912
913 count = 0;
914 buf = outbuf;
915 len = min(ra->ra_size, buflen);
916 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
917 if (len < sizeof(rpbuf))
918 break;
919
920 lagg_port2req(lp, &rpbuf);
921 memcpy(buf, &rpbuf, sizeof(rpbuf));
922 count++;
923 buf += sizeof(rpbuf);
924 len -= sizeof(rpbuf);
925 }
926 LAGG_RUNLOCK(sc);
927 ra->ra_ports = count;
928 ra->ra_size = count * sizeof(rpbuf);
929 error = copyout(outbuf, ra->ra_port, ra->ra_size);
930 free(outbuf, M_TEMP);
931 break;
932 case SIOCSLAGG:
933 error = priv_check(td, PRIV_NET_LAGG);
934 if (error)
935 break;
936 if (ra->ra_proto >= LAGG_PROTO_MAX) {
937 error = EPROTONOSUPPORT;
938 break;
939 }
940 LAGG_WLOCK(sc);
941 if (sc->sc_proto != LAGG_PROTO_NONE) {
942 /* Reset protocol first in case detach unlocks */
943 sc->sc_proto = LAGG_PROTO_NONE;
944 error = sc->sc_detach(sc);
945 sc->sc_detach = NULL;
946 sc->sc_start = NULL;
947 sc->sc_input = NULL;
948 sc->sc_port_create = NULL;
949 sc->sc_port_destroy = NULL;
950 sc->sc_linkstate = NULL;
951 sc->sc_init = NULL;
952 sc->sc_stop = NULL;
953 sc->sc_lladdr = NULL;
954 sc->sc_req = NULL;
955 sc->sc_portreq = NULL;
956 } else if (sc->sc_input != NULL) {
957 /* Still detaching */
958 error = EBUSY;
959 }
960 if (error != 0) {
961 LAGG_WUNLOCK(sc);
962 break;
963 }
964 for (int i = 0; i < (sizeof(lagg_protos) /
965 sizeof(lagg_protos[0])); i++) {
966 if (lagg_protos[i].ti_proto == ra->ra_proto) {
967 if (sc->sc_ifflags & IFF_DEBUG)
968 printf("%s: using proto %u\n",
969 sc->sc_ifname,
970 lagg_protos[i].ti_proto);
971 sc->sc_proto = lagg_protos[i].ti_proto;
972 if (sc->sc_proto != LAGG_PROTO_NONE)
973 error = lagg_protos[i].ti_attach(sc);
974 LAGG_WUNLOCK(sc);
975 return (error);
976 }
977 }
978 LAGG_WUNLOCK(sc);
979 error = EPROTONOSUPPORT;
980 break;
981 case SIOCGLAGGFLAGS:
982 rf->rf_flags = sc->sc_flags;
983 break;
984 case SIOCSLAGGHASH:
985 error = priv_check(td, PRIV_NET_LAGG);
986 if (error)
987 break;
988 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
989 error = EINVAL;
990 break;
991 }
992 LAGG_WLOCK(sc);
993 sc->sc_flags &= ~LAGG_F_HASHMASK;
994 sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
995 LAGG_WUNLOCK(sc);
996 break;
997 case SIOCGLAGGPORT:
998 if (rp->rp_portname[0] == '\0' ||
999 (tpif = ifunit(rp->rp_portname)) == NULL) {
1000 error = EINVAL;
1001 break;
1002 }
1003
1004 LAGG_RLOCK(sc);
1005 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1006 lp->lp_softc != sc) {
1007 error = ENOENT;
1008 LAGG_RUNLOCK(sc);
1009 break;
1010 }
1011
1012 lagg_port2req(lp, rp);
1013 LAGG_RUNLOCK(sc);
1014 break;
1015 case SIOCSLAGGPORT:
1016 error = priv_check(td, PRIV_NET_LAGG);
1017 if (error)
1018 break;
1019 if (rp->rp_portname[0] == '\0' ||
1020 (tpif = ifunit(rp->rp_portname)) == NULL) {
1021 error = EINVAL;
1022 break;
1023 }
1024 LAGG_WLOCK(sc);
1025 error = lagg_port_create(sc, tpif);
1026 LAGG_WUNLOCK(sc);
1027 break;
1028 case SIOCSLAGGDELPORT:
1029 error = priv_check(td, PRIV_NET_LAGG);
1030 if (error)
1031 break;
1032 if (rp->rp_portname[0] == '\0' ||
1033 (tpif = ifunit(rp->rp_portname)) == NULL) {
1034 error = EINVAL;
1035 break;
1036 }
1037
1038 LAGG_WLOCK(sc);
1039 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1040 lp->lp_softc != sc) {
1041 error = ENOENT;
1042 LAGG_WUNLOCK(sc);
1043 break;
1044 }
1045
1046 error = lagg_port_destroy(lp, 1);
1047 LAGG_WUNLOCK(sc);
1048 break;
1049 case SIOCSIFFLAGS:
1050 /* Set flags on ports too */
1051 LAGG_WLOCK(sc);
1052 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1053 lagg_setflags(lp, 1);
1054 }
1055 LAGG_WUNLOCK(sc);
1056
1057 if (!(ifp->if_flags & IFF_UP) &&
1058 (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1059 /*
1060 * If interface is marked down and it is running,
1061 * then stop and disable it.
1062 */
1063 LAGG_WLOCK(sc);
1064 lagg_stop(sc);
1065 LAGG_WUNLOCK(sc);
1066 } else if ((ifp->if_flags & IFF_UP) &&
1067 !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1068 /*
1069 * If interface is marked up and it is stopped, then
1070 * start it.
1071 */
1072 (*ifp->if_init)(sc);
1073 }
1074 break;
1075 case SIOCADDMULTI:
1076 case SIOCDELMULTI:
1077 LAGG_WLOCK(sc);
1078 error = lagg_ether_setmulti(sc);
1079 LAGG_WUNLOCK(sc);
1080 break;
1081 case SIOCSIFMEDIA:
1082 case SIOCGIFMEDIA:
1083 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1084 break;
1085
1086 case SIOCSIFCAP:
1087 case SIOCSIFMTU:
1088 /* Do not allow the MTU or caps to be directly changed */
1089 error = EINVAL;
1090 break;
1091
1092 default:
1093 error = ether_ioctl(ifp, cmd, data);
1094 break;
1095 }
1096 return (error);
1097 }
1098
1099 static int
1100 lagg_ether_setmulti(struct lagg_softc *sc)
1101 {
1102 struct lagg_port *lp;
1103
1104 LAGG_WLOCK_ASSERT(sc);
1105
1106 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1107 /* First, remove any existing filter entries. */
1108 lagg_ether_cmdmulti(lp, 0);
1109 /* copy all addresses from the lagg interface to the port */
1110 lagg_ether_cmdmulti(lp, 1);
1111 }
1112 return (0);
1113 }
1114
1115 static int
1116 lagg_ether_cmdmulti(struct lagg_port *lp, int set)
1117 {
1118 struct lagg_softc *sc = lp->lp_softc;
1119 struct ifnet *ifp = lp->lp_ifp;
1120 struct ifnet *scifp = sc->sc_ifp;
1121 struct lagg_mc *mc;
1122 struct ifmultiaddr *ifma, *rifma = NULL;
1123 struct sockaddr_dl sdl;
1124 int error;
1125
1126 LAGG_WLOCK_ASSERT(sc);
1127
1128 bzero((char *)&sdl, sizeof(sdl));
1129 sdl.sdl_len = sizeof(sdl);
1130 sdl.sdl_family = AF_LINK;
1131 sdl.sdl_type = IFT_ETHER;
1132 sdl.sdl_alen = ETHER_ADDR_LEN;
1133 sdl.sdl_index = ifp->if_index;
1134
1135 if (set) {
1136 TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
1137 if (ifma->ifma_addr->sa_family != AF_LINK)
1138 continue;
1139 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1140 LLADDR(&sdl), ETHER_ADDR_LEN);
1141
1142 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
1143 if (error)
1144 return (error);
1145 mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
1146 if (mc == NULL)
1147 return (ENOMEM);
1148 mc->mc_ifma = rifma;
1149 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
1150 }
1151 } else {
1152 while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
1153 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
1154 if_delmulti_ifma(mc->mc_ifma);
1155 free(mc, M_DEVBUF);
1156 }
1157 }
1158 return (0);
1159 }
1160
1161 /* Handle a ref counted flag that should be set on the lagg port as well */
1162 static int
1163 lagg_setflag(struct lagg_port *lp, int flag, int status,
1164 int (*func)(struct ifnet *, int))
1165 {
1166 struct lagg_softc *sc = lp->lp_softc;
1167 struct ifnet *scifp = sc->sc_ifp;
1168 struct ifnet *ifp = lp->lp_ifp;
1169 int error;
1170
1171 LAGG_WLOCK_ASSERT(sc);
1172
1173 status = status ? (scifp->if_flags & flag) : 0;
1174 /* Now "status" contains the flag value or 0 */
1175
1176 /*
1177 * See if recorded ports status is different from what
1178 * we want it to be. If it is, flip it. We record ports
1179 * status in lp_ifflags so that we won't clear ports flag
1180 * we haven't set. In fact, we don't clear or set ports
1181 * flags directly, but get or release references to them.
1182 * That's why we can be sure that recorded flags still are
1183 * in accord with actual ports flags.
1184 */
1185 if (status != (lp->lp_ifflags & flag)) {
1186 error = (*func)(ifp, status);
1187 if (error)
1188 return (error);
1189 lp->lp_ifflags &= ~flag;
1190 lp->lp_ifflags |= status;
1191 }
1192 return (0);
1193 }
1194
1195 /*
1196 * Handle IFF_* flags that require certain changes on the lagg port
1197 * if "status" is true, update ports flags respective to the lagg
1198 * if "status" is false, forcedly clear the flags set on port.
1199 */
1200 static int
1201 lagg_setflags(struct lagg_port *lp, int status)
1202 {
1203 int error, i;
1204
1205 for (i = 0; lagg_pflags[i].flag; i++) {
1206 error = lagg_setflag(lp, lagg_pflags[i].flag,
1207 status, lagg_pflags[i].func);
1208 if (error)
1209 return (error);
1210 }
1211 return (0);
1212 }
1213
1214 static void
1215 lagg_start(struct ifnet *ifp)
1216 {
1217 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1218 struct mbuf *m;
1219 int error = 0;
1220
1221 LAGG_RLOCK(sc);
1222 /* We need a Tx algorithm and at least one port */
1223 if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
1224 IF_DRAIN(&ifp->if_snd);
1225 LAGG_RUNLOCK(sc);
1226 return;
1227 }
1228
1229 for (;; error = 0) {
1230 IFQ_DEQUEUE(&ifp->if_snd, m);
1231 if (m == NULL)
1232 break;
1233
1234 ETHER_BPF_MTAP(ifp, m);
1235
1236 error = (*sc->sc_start)(sc, m);
1237 if (error == 0)
1238 ifp->if_opackets++;
1239 else
1240 ifp->if_oerrors++;
1241 }
1242 LAGG_RUNLOCK(sc);
1243 }
1244
1245 static struct mbuf *
1246 lagg_input(struct ifnet *ifp, struct mbuf *m)
1247 {
1248 struct lagg_port *lp = ifp->if_lagg;
1249 struct lagg_softc *sc = lp->lp_softc;
1250 struct ifnet *scifp = sc->sc_ifp;
1251
1252 LAGG_RLOCK(sc);
1253 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1254 (lp->lp_flags & LAGG_PORT_DISABLED) ||
1255 sc->sc_proto == LAGG_PROTO_NONE) {
1256 LAGG_RUNLOCK(sc);
1257 m_freem(m);
1258 return (NULL);
1259 }
1260
1261 ETHER_BPF_MTAP(scifp, m);
1262
1263 m = (*sc->sc_input)(sc, lp, m);
1264
1265 if (m != NULL) {
1266 scifp->if_ipackets++;
1267 scifp->if_ibytes += m->m_pkthdr.len;
1268
1269 if (scifp->if_flags & IFF_MONITOR) {
1270 m_freem(m);
1271 m = NULL;
1272 }
1273 }
1274
1275 LAGG_RUNLOCK(sc);
1276 return (m);
1277 }
1278
1279 static int
1280 lagg_media_change(struct ifnet *ifp)
1281 {
1282 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1283
1284 if (sc->sc_ifflags & IFF_DEBUG)
1285 printf("%s\n", __func__);
1286
1287 /* Ignore */
1288 return (0);
1289 }
1290
1291 static void
1292 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1293 {
1294 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1295 struct lagg_port *lp;
1296
1297 imr->ifm_status = IFM_AVALID;
1298 imr->ifm_active = IFM_ETHER | IFM_AUTO;
1299
1300 LAGG_RLOCK(sc);
1301 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1302 if (LAGG_PORTACTIVE(lp))
1303 imr->ifm_status |= IFM_ACTIVE;
1304 }
1305 LAGG_RUNLOCK(sc);
1306 }
1307
1308 static void
1309 lagg_linkstate(struct lagg_softc *sc)
1310 {
1311 struct lagg_port *lp;
1312 int new_link = LINK_STATE_DOWN;
1313 uint64_t speed;
1314
1315 /* Our link is considered up if at least one of our ports is active */
1316 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1317 if (lp->lp_link_state == LINK_STATE_UP) {
1318 new_link = LINK_STATE_UP;
1319 break;
1320 }
1321 }
1322 if_link_state_change(sc->sc_ifp, new_link);
1323
1324 /* Update if_baudrate to reflect the max possible speed */
1325 switch (sc->sc_proto) {
1326 case LAGG_PROTO_FAILOVER:
1327 sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
1328 sc->sc_primary->lp_ifp->if_baudrate : 0;
1329 break;
1330 case LAGG_PROTO_ROUNDROBIN:
1331 case LAGG_PROTO_LOADBALANCE:
1332 case LAGG_PROTO_ETHERCHANNEL:
1333 speed = 0;
1334 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1335 speed += lp->lp_ifp->if_baudrate;
1336 sc->sc_ifp->if_baudrate = speed;
1337 break;
1338 case LAGG_PROTO_LACP:
1339 /* LACP updates if_baudrate itself */
1340 break;
1341 }
1342 }
1343
1344 static void
1345 lagg_port_state(struct ifnet *ifp, int state)
1346 {
1347 struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
1348 struct lagg_softc *sc = NULL;
1349
1350 if (lp != NULL)
1351 sc = lp->lp_softc;
1352 if (sc == NULL)
1353 return;
1354
1355 LAGG_WLOCK(sc);
1356 lagg_linkstate(sc);
1357 if (sc->sc_linkstate != NULL)
1358 (*sc->sc_linkstate)(lp);
1359 LAGG_WUNLOCK(sc);
1360 }
1361
1362 struct lagg_port *
1363 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
1364 {
1365 struct lagg_port *lp_next, *rval = NULL;
1366 // int new_link = LINK_STATE_DOWN;
1367
1368 LAGG_RLOCK_ASSERT(sc);
1369 /*
1370 * Search a port which reports an active link state.
1371 */
1372
1373 if (lp == NULL)
1374 goto search;
1375 if (LAGG_PORTACTIVE(lp)) {
1376 rval = lp;
1377 goto found;
1378 }
1379 if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
1380 LAGG_PORTACTIVE(lp_next)) {
1381 rval = lp_next;
1382 goto found;
1383 }
1384
1385 search:
1386 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1387 if (LAGG_PORTACTIVE(lp_next)) {
1388 rval = lp_next;
1389 goto found;
1390 }
1391 }
1392
1393 found:
1394 if (rval != NULL) {
1395 /*
1396 * The IEEE 802.1D standard assumes that a lagg with
1397 * multiple ports is always full duplex. This is valid
1398 * for load sharing laggs and if at least two links
1399 * are active. Unfortunately, checking the latter would
1400 * be too expensive at this point.
1401 XXX
1402 if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
1403 (sc->sc_count > 1))
1404 new_link = LINK_STATE_FULL_DUPLEX;
1405 else
1406 new_link = rval->lp_link_state;
1407 */
1408 }
1409
1410 return (rval);
1411 }
1412
1413 static const void *
1414 lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
1415 {
1416 if (m->m_pkthdr.len < (off + len)) {
1417 return (NULL);
1418 } else if (m->m_len < (off + len)) {
1419 m_copydata(m, off, len, buf);
1420 return (buf);
1421 }
1422 return (mtod(m, char *) + off);
1423 }
1424
1425 uint32_t
1426 lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
1427 {
1428 uint16_t etype;
1429 uint32_t p = key;
1430 int off;
1431 struct ether_header *eh;
1432 const struct ether_vlan_header *vlan;
1433 #ifdef INET
1434 const struct ip *ip;
1435 const uint32_t *ports;
1436 int iphlen;
1437 #endif
1438 #ifdef INET6
1439 const struct ip6_hdr *ip6;
1440 uint32_t flow;
1441 #endif
1442 union {
1443 #ifdef INET
1444 struct ip ip;
1445 #endif
1446 #ifdef INET6
1447 struct ip6_hdr ip6;
1448 #endif
1449 struct ether_vlan_header vlan;
1450 uint32_t port;
1451 } buf;
1452
1453
1454 off = sizeof(*eh);
1455 if (m->m_len < off)
1456 goto out;
1457 eh = mtod(m, struct ether_header *);
1458 etype = ntohs(eh->ether_type);
1459 if (sc->sc_flags & LAGG_F_HASHL2) {
1460 p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
1461 p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
1462 }
1463
1464 /* Special handling for encapsulating VLAN frames */
1465 if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
1466 p = hash32_buf(&m->m_pkthdr.ether_vtag,
1467 sizeof(m->m_pkthdr.ether_vtag), p);
1468 } else if (etype == ETHERTYPE_VLAN) {
1469 vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf);
1470 if (vlan == NULL)
1471 goto out;
1472
1473 if (sc->sc_flags & LAGG_F_HASHL2)
1474 p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
1475 etype = ntohs(vlan->evl_proto);
1476 off += sizeof(*vlan) - sizeof(*eh);
1477 }
1478
1479 switch (etype) {
1480 #ifdef INET
1481 case ETHERTYPE_IP:
1482 ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
1483 if (ip == NULL)
1484 goto out;
1485
1486 if (sc->sc_flags & LAGG_F_HASHL3) {
1487 p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
1488 p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
1489 }
1490 if (!(sc->sc_flags & LAGG_F_HASHL4))
1491 break;
1492 switch (ip->ip_p) {
1493 case IPPROTO_TCP:
1494 case IPPROTO_UDP:
1495 case IPPROTO_SCTP:
1496 iphlen = ip->ip_hl << 2;
1497 if (iphlen < sizeof(*ip))
1498 break;
1499 off += iphlen;
1500 ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
1501 if (ports == NULL)
1502 break;
1503 p = hash32_buf(ports, sizeof(*ports), p);
1504 break;
1505 }
1506 break;
1507 #endif
1508 #ifdef INET6
1509 case ETHERTYPE_IPV6:
1510 if (!(sc->sc_flags & LAGG_F_HASHL3))
1511 break;
1512 ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
1513 if (ip6 == NULL)
1514 goto out;
1515
1516 p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
1517 p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
1518 flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
1519 p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
1520 break;
1521 #endif
1522 }
1523 out:
1524 return (p);
1525 }
1526
1527 int
1528 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
1529 {
1530
1531 return (ifp->if_transmit)(ifp, m);
1532 }
1533
1534 /*
1535 * Simple round robin aggregation
1536 */
1537
1538 static int
1539 lagg_rr_attach(struct lagg_softc *sc)
1540 {
1541 sc->sc_detach = lagg_rr_detach;
1542 sc->sc_start = lagg_rr_start;
1543 sc->sc_input = lagg_rr_input;
1544 sc->sc_port_create = NULL;
1545 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1546 sc->sc_seq = 0;
1547
1548 return (0);
1549 }
1550
1551 static int
1552 lagg_rr_detach(struct lagg_softc *sc)
1553 {
1554 return (0);
1555 }
1556
1557 static int
1558 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
1559 {
1560 struct lagg_port *lp;
1561 uint32_t p;
1562
1563 p = atomic_fetchadd_32(&sc->sc_seq, 1);
1564 p %= sc->sc_count;
1565 lp = SLIST_FIRST(&sc->sc_ports);
1566 while (p--)
1567 lp = SLIST_NEXT(lp, lp_entries);
1568
1569 /*
1570 * Check the port's link state. This will return the next active
1571 * port if the link is down or the port is NULL.
1572 */
1573 if ((lp = lagg_link_active(sc, lp)) == NULL) {
1574 m_freem(m);
1575 return (ENETDOWN);
1576 }
1577
1578 /* Send mbuf */
1579 return (lagg_enqueue(lp->lp_ifp, m));
1580 }
1581
1582 static struct mbuf *
1583 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1584 {
1585 struct ifnet *ifp = sc->sc_ifp;
1586
1587 /* Just pass in the packet to our lagg device */
1588 m->m_pkthdr.rcvif = ifp;
1589
1590 return (m);
1591 }
1592
1593 /*
1594 * Active failover
1595 */
1596
1597 static int
1598 lagg_fail_attach(struct lagg_softc *sc)
1599 {
1600 sc->sc_detach = lagg_fail_detach;
1601 sc->sc_start = lagg_fail_start;
1602 sc->sc_input = lagg_fail_input;
1603 sc->sc_port_create = NULL;
1604 sc->sc_port_destroy = NULL;
1605
1606 return (0);
1607 }
1608
1609 static int
1610 lagg_fail_detach(struct lagg_softc *sc)
1611 {
1612 return (0);
1613 }
1614
1615 static int
1616 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
1617 {
1618 struct lagg_port *lp;
1619
1620 /* Use the master port if active or the next available port */
1621 if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
1622 m_freem(m);
1623 return (ENETDOWN);
1624 }
1625
1626 /* Send mbuf */
1627 return (lagg_enqueue(lp->lp_ifp, m));
1628 }
1629
1630 static struct mbuf *
1631 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1632 {
1633 struct ifnet *ifp = sc->sc_ifp;
1634 struct lagg_port *tmp_tp;
1635
1636 if (lp == sc->sc_primary || lagg_failover_rx_all) {
1637 m->m_pkthdr.rcvif = ifp;
1638 return (m);
1639 }
1640
1641 if (!LAGG_PORTACTIVE(sc->sc_primary)) {
1642 tmp_tp = lagg_link_active(sc, sc->sc_primary);
1643 /*
1644 * If tmp_tp is null, we've recieved a packet when all
1645 * our links are down. Weird, but process it anyways.
1646 */
1647 if ((tmp_tp == NULL || tmp_tp == lp)) {
1648 m->m_pkthdr.rcvif = ifp;
1649 return (m);
1650 }
1651 }
1652
1653 m_freem(m);
1654 return (NULL);
1655 }
1656
1657 /*
1658 * Loadbalancing
1659 */
1660
1661 static int
1662 lagg_lb_attach(struct lagg_softc *sc)
1663 {
1664 struct lagg_port *lp;
1665 struct lagg_lb *lb;
1666
1667 if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
1668 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
1669 return (ENOMEM);
1670
1671 sc->sc_detach = lagg_lb_detach;
1672 sc->sc_start = lagg_lb_start;
1673 sc->sc_input = lagg_lb_input;
1674 sc->sc_port_create = lagg_lb_port_create;
1675 sc->sc_port_destroy = lagg_lb_port_destroy;
1676 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1677
1678 lb->lb_key = arc4random();
1679 sc->sc_psc = (caddr_t)lb;
1680
1681 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1682 lagg_lb_port_create(lp);
1683
1684 return (0);
1685 }
1686
1687 static int
1688 lagg_lb_detach(struct lagg_softc *sc)
1689 {
1690 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1691 if (lb != NULL)
1692 free(lb, M_DEVBUF);
1693 return (0);
1694 }
1695
1696 static int
1697 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
1698 {
1699 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1700 struct lagg_port *lp_next;
1701 int i = 0;
1702
1703 bzero(&lb->lb_ports, sizeof(lb->lb_ports));
1704 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1705 if (lp_next == lp)
1706 continue;
1707 if (i >= LAGG_MAX_PORTS)
1708 return (EINVAL);
1709 if (sc->sc_ifflags & IFF_DEBUG)
1710 printf("%s: port %s at index %d\n",
1711 sc->sc_ifname, lp_next->lp_ifname, i);
1712 lb->lb_ports[i++] = lp_next;
1713 }
1714
1715 return (0);
1716 }
1717
1718 static int
1719 lagg_lb_port_create(struct lagg_port *lp)
1720 {
1721 struct lagg_softc *sc = lp->lp_softc;
1722 return (lagg_lb_porttable(sc, NULL));
1723 }
1724
1725 static void
1726 lagg_lb_port_destroy(struct lagg_port *lp)
1727 {
1728 struct lagg_softc *sc = lp->lp_softc;
1729 lagg_lb_porttable(sc, lp);
1730 }
1731
1732 static int
1733 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
1734 {
1735 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
1736 struct lagg_port *lp = NULL;
1737 uint32_t p = 0;
1738
1739 if (sc->use_flowid && (m->m_flags & M_FLOWID))
1740 p = m->m_pkthdr.flowid;
1741 else
1742 p = lagg_hashmbuf(sc, m, lb->lb_key);
1743 p %= sc->sc_count;
1744 lp = lb->lb_ports[p];
1745
1746 /*
1747 * Check the port's link state. This will return the next active
1748 * port if the link is down or the port is NULL.
1749 */
1750 if ((lp = lagg_link_active(sc, lp)) == NULL) {
1751 m_freem(m);
1752 return (ENETDOWN);
1753 }
1754
1755 /* Send mbuf */
1756 return (lagg_enqueue(lp->lp_ifp, m));
1757 }
1758
1759 static struct mbuf *
1760 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1761 {
1762 struct ifnet *ifp = sc->sc_ifp;
1763
1764 /* Just pass in the packet to our lagg device */
1765 m->m_pkthdr.rcvif = ifp;
1766
1767 return (m);
1768 }
1769
1770 /*
1771 * 802.3ad LACP
1772 */
1773
1774 static int
1775 lagg_lacp_attach(struct lagg_softc *sc)
1776 {
1777 struct lagg_port *lp;
1778 int error;
1779
1780 sc->sc_detach = lagg_lacp_detach;
1781 sc->sc_port_create = lacp_port_create;
1782 sc->sc_port_destroy = lacp_port_destroy;
1783 sc->sc_linkstate = lacp_linkstate;
1784 sc->sc_start = lagg_lacp_start;
1785 sc->sc_input = lagg_lacp_input;
1786 sc->sc_init = lacp_init;
1787 sc->sc_stop = lacp_stop;
1788 sc->sc_lladdr = lagg_lacp_lladdr;
1789 sc->sc_req = lacp_req;
1790 sc->sc_portreq = lacp_portreq;
1791
1792 error = lacp_attach(sc);
1793 if (error)
1794 return (error);
1795
1796 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1797 lacp_port_create(lp);
1798
1799 return (error);
1800 }
1801
1802 static int
1803 lagg_lacp_detach(struct lagg_softc *sc)
1804 {
1805 struct lagg_port *lp;
1806 int error;
1807
1808 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1809 lacp_port_destroy(lp);
1810
1811 /* unlocking is safe here */
1812 LAGG_WUNLOCK(sc);
1813 error = lacp_detach(sc);
1814 LAGG_WLOCK(sc);
1815
1816 return (error);
1817 }
1818
1819 static void
1820 lagg_lacp_lladdr(struct lagg_softc *sc)
1821 {
1822 struct lagg_port *lp;
1823
1824 /* purge all the lacp ports */
1825 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1826 lacp_port_destroy(lp);
1827
1828 /* add them back in */
1829 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1830 lacp_port_create(lp);
1831 }
1832
1833 static int
1834 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
1835 {
1836 struct lagg_port *lp;
1837
1838 lp = lacp_select_tx_port(sc, m);
1839 if (lp == NULL) {
1840 m_freem(m);
1841 return (EBUSY);
1842 }
1843
1844 /* Send mbuf */
1845 return (lagg_enqueue(lp->lp_ifp, m));
1846 }
1847
1848 static struct mbuf *
1849 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1850 {
1851 struct ifnet *ifp = sc->sc_ifp;
1852 struct ether_header *eh;
1853 u_short etype;
1854
1855 eh = mtod(m, struct ether_header *);
1856 etype = ntohs(eh->ether_type);
1857
1858 /* Tap off LACP control messages */
1859 if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
1860 m = lacp_input(lp, m);
1861 if (m == NULL)
1862 return (NULL);
1863 }
1864
1865 /*
1866 * If the port is not collecting or not in the active aggregator then
1867 * free and return.
1868 */
1869 if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
1870 m_freem(m);
1871 return (NULL);
1872 }
1873
1874 m->m_pkthdr.rcvif = ifp;
1875 return (m);
1876 }
Cache object: 76f66e6ec8c456b511b224b584af1462
|