[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/net/ieee8023ad_lacp.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*      $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $     */
  2 
  3 /*-
  4  * Copyright (c)2005 YAMAMOTO Takashi,
  5  * Copyright (c)2008 Andrew Thompson <thompsa@FreeBSD.org>
  6  * All rights reserved.
  7  *
  8  * Redistribution and use in source and binary forms, with or without
  9  * modification, are permitted provided that the following conditions
 10  * are met:
 11  * 1. Redistributions of source code must retain the above copyright
 12  *    notice, this list of conditions and the following disclaimer.
 13  * 2. Redistributions in binary form must reproduce the above copyright
 14  *    notice, this list of conditions and the following disclaimer in the
 15  *    documentation and/or other materials provided with the distribution.
 16  *
 17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 27  * SUCH DAMAGE.
 28  */
 29 
 30 #include <sys/cdefs.h>
 31 __FBSDID("$FreeBSD: src/sys/net/ieee8023ad_lacp.c,v 1.15 2008/03/16 19:25:30 thompsa Exp $");
 32 
 33 #include <sys/param.h>
 34 #include <sys/callout.h>
 35 #include <sys/mbuf.h>
 36 #include <sys/systm.h>
 37 #include <sys/malloc.h>
 38 #include <sys/kernel.h> /* hz */
 39 #include <sys/socket.h> /* for net/if.h */
 40 #include <sys/sockio.h>
 41 #include <machine/stdarg.h>
 42 #include <sys/lock.h>
 43 #include <sys/rwlock.h>
 44 
 45 #include <net/if.h>
 46 #include <net/if_dl.h>
 47 #include <net/ethernet.h>
 48 #include <net/if_media.h>
 49 #include <net/if_types.h>
 50 
 51 #include <net/if_lagg.h>
 52 #include <net/ieee8023ad_lacp.h>
 53 
 54 /*
 55  * actor system priority and port priority.
 56  * XXX should be configurable.
 57  */
 58 
 59 #define LACP_SYSTEM_PRIO        0x8000
 60 #define LACP_PORT_PRIO          0x8000
 61 
 62 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
 63     { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
 64 
 65 static const struct tlv_template lacp_info_tlv_template[] = {
 66         { LACP_TYPE_ACTORINFO,
 67             sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
 68         { LACP_TYPE_PARTNERINFO,
 69             sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
 70         { LACP_TYPE_COLLECTORINFO,
 71             sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
 72         { 0, 0 },
 73 };
 74 
 75 static const struct tlv_template marker_info_tlv_template[] = {
 76         { MARKER_TYPE_INFO,
 77             sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
 78         { 0, 0 },
 79 };
 80 
 81 static const struct tlv_template marker_response_tlv_template[] = {
 82         { MARKER_TYPE_RESPONSE,
 83             sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
 84         { 0, 0 },
 85 };
 86 
 87 typedef void (*lacp_timer_func_t)(struct lacp_port *);
 88 
 89 static void     lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
 90 static void     lacp_fill_markerinfo(struct lacp_port *,
 91                     struct lacp_markerinfo *);
 92 
 93 static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *);
 94 static void     lacp_suppress_distributing(struct lacp_softc *,
 95                     struct lacp_aggregator *);
 96 static void     lacp_transit_expire(void *);
 97 static void     lacp_update_portmap(struct lacp_softc *);
 98 static void     lacp_select_active_aggregator(struct lacp_softc *);
 99 static uint16_t lacp_compose_key(struct lacp_port *);
100 static int      tlv_check(const void *, size_t, const struct tlvhdr *,
101                     const struct tlv_template *, boolean_t);
102 static void     lacp_tick(void *);
103 
104 static void     lacp_fill_aggregator_id(struct lacp_aggregator *,
105                     const struct lacp_port *);
106 static void     lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
107                     const struct lacp_peerinfo *);
108 static int      lacp_aggregator_is_compatible(const struct lacp_aggregator *,
109                     const struct lacp_port *);
110 static int      lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
111                     const struct lacp_peerinfo *);
112 
113 static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
114                     struct lacp_port *);
115 static void     lacp_aggregator_addref(struct lacp_softc *,
116                     struct lacp_aggregator *);
117 static void     lacp_aggregator_delref(struct lacp_softc *,
118                     struct lacp_aggregator *);
119 
120 /* receive machine */
121 
122 static int      lacp_pdu_input(struct lacp_port *, struct mbuf *);
123 static int      lacp_marker_input(struct lacp_port *, struct mbuf *);
124 static void     lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
125 static void     lacp_sm_rx_timer(struct lacp_port *);
126 static void     lacp_sm_rx_set_expired(struct lacp_port *);
127 static void     lacp_sm_rx_update_ntt(struct lacp_port *,
128                     const struct lacpdu *);
129 static void     lacp_sm_rx_record_pdu(struct lacp_port *,
130                     const struct lacpdu *);
131 static void     lacp_sm_rx_update_selected(struct lacp_port *,
132                     const struct lacpdu *);
133 static void     lacp_sm_rx_record_default(struct lacp_port *);
134 static void     lacp_sm_rx_update_default_selected(struct lacp_port *);
135 static void     lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
136                     const struct lacp_peerinfo *);
137 
138 /* mux machine */
139 
140 static void     lacp_sm_mux(struct lacp_port *);
141 static void     lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
142 static void     lacp_sm_mux_timer(struct lacp_port *);
143 
144 /* periodic transmit machine */
145 
146 static void     lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
147 static void     lacp_sm_ptx_tx_schedule(struct lacp_port *);
148 static void     lacp_sm_ptx_timer(struct lacp_port *);
149 
150 /* transmit machine */
151 
152 static void     lacp_sm_tx(struct lacp_port *);
153 static void     lacp_sm_assert_ntt(struct lacp_port *);
154 
155 static void     lacp_run_timers(struct lacp_port *);
156 static int      lacp_compare_peerinfo(const struct lacp_peerinfo *,
157                     const struct lacp_peerinfo *);
158 static int      lacp_compare_systemid(const struct lacp_systemid *,
159                     const struct lacp_systemid *);
160 static void     lacp_port_enable(struct lacp_port *);
161 static void     lacp_port_disable(struct lacp_port *);
162 static void     lacp_select(struct lacp_port *);
163 static void     lacp_unselect(struct lacp_port *);
164 static void     lacp_disable_collecting(struct lacp_port *);
165 static void     lacp_enable_collecting(struct lacp_port *);
166 static void     lacp_disable_distributing(struct lacp_port *);
167 static void     lacp_enable_distributing(struct lacp_port *);
168 static int      lacp_xmit_lacpdu(struct lacp_port *);
169 static int      lacp_xmit_marker(struct lacp_port *);
170 
171 #if defined(LACP_DEBUG)
172 static void     lacp_dump_lacpdu(const struct lacpdu *);
173 static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
174                     size_t);
175 static const char *lacp_format_lagid(const struct lacp_peerinfo *,
176                     const struct lacp_peerinfo *, char *, size_t);
177 static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
178                     char *, size_t);
179 static const char *lacp_format_state(uint8_t, char *, size_t);
180 static const char *lacp_format_mac(const uint8_t *, char *, size_t);
181 static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
182                     size_t);
183 static const char *lacp_format_portid(const struct lacp_portid *, char *,
184                     size_t);
185 static void     lacp_dprintf(const struct lacp_port *, const char *, ...)
186                     __attribute__((__format__(__printf__, 2, 3)));
187 #define LACP_DPRINTF(a) lacp_dprintf a
188 #else
189 #define LACP_DPRINTF(a) /* nothing */
190 #endif
191 
192 /*
193  * partner administration variables.
194  * XXX should be configurable.
195  */
196 
197 static const struct lacp_peerinfo lacp_partner_admin = {
198         .lip_systemid = { .lsi_prio = 0xffff },
199         .lip_portid = { .lpi_prio = 0xffff },
200 #if 1
201         /* optimistic */
202         .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
203             LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
204 #else
205         /* pessimistic */
206         .lip_state = 0,
207 #endif
208 };
209 
210 static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
211         [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
212         [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
213         [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
214 };
215 
216 struct mbuf *
217 lacp_input(struct lagg_port *lgp, struct mbuf *m)
218 {
219         struct lacp_port *lp = LACP_PORT(lgp);
220         uint8_t subtype;
221 
222         if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) {
223                 m_freem(m);
224                 return (NULL);
225         }
226 
227         m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype);
228         switch (subtype) {
229                 case SLOWPROTOCOLS_SUBTYPE_LACP:
230                         lacp_pdu_input(lp, m);
231                         return (NULL);
232 
233                 case SLOWPROTOCOLS_SUBTYPE_MARKER:
234                         lacp_marker_input(lp, m);
235                         return (NULL);
236         }
237 
238         /* Not a subtype we are interested in */
239         return (m);
240 }
241 
242 /*
243  * lacp_pdu_input: process lacpdu
244  */
245 static int
246 lacp_pdu_input(struct lacp_port *lp, struct mbuf *m)
247 {
248         struct lacp_softc *lsc = lp->lp_lsc;
249         struct lacpdu *du;
250         int error = 0;
251 
252         if (m->m_pkthdr.len != sizeof(*du)) {
253                 goto bad;
254         }
255 
256         if ((m->m_flags & M_MCAST) == 0) {
257                 goto bad;
258         }
259 
260         if (m->m_len < sizeof(*du)) {
261                 m = m_pullup(m, sizeof(*du));
262                 if (m == NULL) {
263                         return (ENOMEM);
264                 }
265         }
266 
267         du = mtod(m, struct lacpdu *);
268 
269         if (memcmp(&du->ldu_eh.ether_dhost,
270             &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
271                 goto bad;
272         }
273 
274         /*
275          * ignore the version for compatibility with
276          * the future protocol revisions.
277          */
278 #if 0
279         if (du->ldu_sph.sph_version != 1) {
280                 goto bad;
281         }
282 #endif
283 
284         /*
285          * ignore tlv types for compatibility with
286          * the future protocol revisions.
287          */
288         if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
289             lacp_info_tlv_template, FALSE)) {
290                 goto bad;
291         }
292 
293 #if defined(LACP_DEBUG)
294         LACP_DPRINTF((lp, "lacpdu receive\n"));
295         lacp_dump_lacpdu(du);
296 #endif /* defined(LACP_DEBUG) */
297 
298         LACP_LOCK(lsc);
299         lacp_sm_rx(lp, du);
300         LACP_UNLOCK(lsc);
301 
302         m_freem(m);
303         return (error);
304 
305 bad:
306         m_freem(m);
307         return (EINVAL);
308 }
309 
310 static void
311 lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
312 {
313         struct lagg_port *lgp = lp->lp_lagg;
314         struct lagg_softc *sc = lgp->lp_softc;
315 
316         info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
317         memcpy(&info->lip_systemid.lsi_mac,
318             IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
319         info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
320         info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
321         info->lip_state = lp->lp_state;
322 }
323 
324 static void
325 lacp_fill_markerinfo(struct lacp_port *lp, struct lacp_markerinfo *info)
326 {
327         struct ifnet *ifp = lp->lp_ifp;
328 
329         /* Fill in the port index and system id (encoded as the MAC) */
330         info->mi_rq_port = htons(ifp->if_index);
331         memcpy(&info->mi_rq_system, lp->lp_systemid.lsi_mac, ETHER_ADDR_LEN);
332         info->mi_rq_xid = htonl(0);
333 }
334 
335 static int
336 lacp_xmit_lacpdu(struct lacp_port *lp)
337 {
338         struct lagg_port *lgp = lp->lp_lagg;
339         struct mbuf *m;
340         struct lacpdu *du;
341         int error;
342 
343         LACP_LOCK_ASSERT(lp->lp_lsc);
344 
345         m = m_gethdr(M_DONTWAIT, MT_DATA);
346         if (m == NULL) {
347                 return (ENOMEM);
348         }
349         m->m_len = m->m_pkthdr.len = sizeof(*du);
350 
351         du = mtod(m, struct lacpdu *);
352         memset(du, 0, sizeof(*du));
353 
354         memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
355             ETHER_ADDR_LEN);
356         memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
357         du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
358 
359         du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
360         du->ldu_sph.sph_version = 1;
361 
362         TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
363         du->ldu_actor = lp->lp_actor;
364 
365         TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
366             sizeof(du->ldu_partner));
367         du->ldu_partner = lp->lp_partner;
368 
369         TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
370             sizeof(du->ldu_collector));
371         du->ldu_collector.lci_maxdelay = 0;
372 
373 #if defined(LACP_DEBUG)
374         LACP_DPRINTF((lp, "lacpdu transmit\n"));
375         lacp_dump_lacpdu(du);
376 #endif /* defined(LACP_DEBUG) */
377 
378         m->m_flags |= M_MCAST;
379 
380         /*
381          * XXX should use higher priority queue.
382          * otherwise network congestion can break aggregation.
383          */
384 
385         error = lagg_enqueue(lp->lp_ifp, m);
386         return (error);
387 }
388 
389 static int
390 lacp_xmit_marker(struct lacp_port *lp)
391 {
392         struct lagg_port *lgp = lp->lp_lagg;
393         struct mbuf *m;
394         struct markerdu *mdu;
395         int error;
396 
397         LACP_LOCK_ASSERT(lp->lp_lsc);
398 
399         m = m_gethdr(M_DONTWAIT, MT_DATA);
400         if (m == NULL) {
401                 return (ENOMEM);
402         }
403         m->m_len = m->m_pkthdr.len = sizeof(*mdu);
404 
405         mdu = mtod(m, struct markerdu *);
406         memset(mdu, 0, sizeof(*mdu));
407 
408         memcpy(&mdu->mdu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
409             ETHER_ADDR_LEN);
410         memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
411         mdu->mdu_eh.ether_type = htons(ETHERTYPE_SLOW);
412 
413         mdu->mdu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_MARKER;
414         mdu->mdu_sph.sph_version = 1;
415 
416         /* Bump the transaction id and copy over the marker info */
417         lp->lp_marker.mi_rq_xid = htonl(ntohl(lp->lp_marker.mi_rq_xid) + 1);
418         TLV_SET(&mdu->mdu_tlv, MARKER_TYPE_INFO, sizeof(mdu->mdu_info));
419         mdu->mdu_info = lp->lp_marker;
420 
421         LACP_DPRINTF((lp, "marker transmit, port=%u, sys=%6D, id=%u\n",
422             ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":",
423             ntohl(mdu->mdu_info.mi_rq_xid)));
424 
425         m->m_flags |= M_MCAST;
426         error = lagg_enqueue(lp->lp_ifp, m);
427         return (error);
428 }
429 
430 void
431 lacp_linkstate(struct lagg_port *lgp)
432 {
433         struct lacp_port *lp = LACP_PORT(lgp);
434         struct lacp_softc *lsc = lp->lp_lsc;
435         struct ifnet *ifp = lgp->lp_ifp;
436         struct ifmediareq ifmr;
437         int error = 0;
438         u_int media;
439         uint8_t old_state;
440         uint16_t old_key;
441 
442         bzero((char *)&ifmr, sizeof(ifmr));
443         error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
444         if (error != 0)
445                 return;
446 
447         LACP_LOCK(lsc);
448         media = ifmr.ifm_active;
449         LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, "
450             "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER,
451             (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP));
452         old_state = lp->lp_state;
453         old_key = lp->lp_key;
454 
455         lp->lp_media = media;
456         /*
457          * If the port is not an active full duplex Ethernet link then it can
458          * not be aggregated.
459          */
460         if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 ||
461             ifp->if_link_state != LINK_STATE_UP) {
462                 lacp_port_disable(lp);
463         } else {
464                 lacp_port_enable(lp);
465         }
466         lp->lp_key = lacp_compose_key(lp);
467 
468         if (old_state != lp->lp_state || old_key != lp->lp_key) {
469                 LACP_DPRINTF((lp, "-> UNSELECTED\n"));
470                 lp->lp_selected = LACP_UNSELECTED;
471         }
472         LACP_UNLOCK(lsc);
473 }
474 
475 static void
476 lacp_tick(void *arg)
477 {
478         struct lacp_softc *lsc = arg;
479         struct lacp_port *lp;
480 
481         LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
482                 if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
483                         continue;
484 
485                 lacp_run_timers(lp);
486 
487                 lacp_select(lp);
488                 lacp_sm_mux(lp);
489                 lacp_sm_tx(lp);
490                 lacp_sm_ptx_tx_schedule(lp);
491         }
492         callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
493 }
494 
495 int
496 lacp_port_create(struct lagg_port *lgp)
497 {
498         struct lagg_softc *sc = lgp->lp_softc;
499         struct lacp_softc *lsc = LACP_SOFTC(sc);
500         struct lacp_port *lp;
501         struct ifnet *ifp = lgp->lp_ifp;
502         struct sockaddr_dl sdl;
503         struct ifmultiaddr *rifma = NULL;
504         int error;
505 
506         boolean_t active = TRUE; /* XXX should be configurable */
507         boolean_t fast = FALSE; /* XXX should be configurable */
508 
509         bzero((char *)&sdl, sizeof(sdl));
510         sdl.sdl_len = sizeof(sdl);
511         sdl.sdl_family = AF_LINK;
512         sdl.sdl_index = ifp->if_index;
513         sdl.sdl_type = IFT_ETHER;
514         sdl.sdl_alen = ETHER_ADDR_LEN;
515 
516         bcopy(&ethermulticastaddr_slowprotocols,
517             LLADDR(&sdl), ETHER_ADDR_LEN);
518         error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
519         if (error) {
520                 printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
521                 return (error);
522         }
523 
524         lp = malloc(sizeof(struct lacp_port),
525             M_DEVBUF, M_NOWAIT|M_ZERO);
526         if (lp == NULL)
527                 return (ENOMEM);
528 
529         LACP_LOCK(lsc);
530         lgp->lp_psc = (caddr_t)lp;
531         lp->lp_ifp = ifp;
532         lp->lp_lagg = lgp;
533         lp->lp_lsc = lsc;
534         lp->lp_ifma = rifma;
535 
536         LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
537 
538         lacp_fill_actorinfo(lp, &lp->lp_actor);
539         lacp_fill_markerinfo(lp, &lp->lp_marker);
540         lp->lp_state =
541             (active ? LACP_STATE_ACTIVITY : 0) |
542             (fast ? LACP_STATE_TIMEOUT : 0);
543         lp->lp_aggregator = NULL;
544         lacp_sm_rx_set_expired(lp);
545         LACP_UNLOCK(lsc);
546         lacp_linkstate(lgp);
547 
548         return (0);
549 }
550 
551 void
552 lacp_port_destroy(struct lagg_port *lgp)
553 {
554         struct lacp_port *lp = LACP_PORT(lgp);
555         struct lacp_softc *lsc = lp->lp_lsc;
556         int i;
557 
558         LACP_LOCK(lsc);
559         for (i = 0; i < LACP_NTIMER; i++) {
560                 LACP_TIMER_DISARM(lp, i);
561         }
562 
563         lacp_disable_collecting(lp);
564         lacp_disable_distributing(lp);
565         lacp_unselect(lp);
566 
567         /* The address may have already been removed by if_purgemaddrs() */
568         if (!lgp->lp_detaching)
569                 if_delmulti_ifma(lp->lp_ifma);
570 
571         LIST_REMOVE(lp, lp_next);
572         LACP_UNLOCK(lsc);
573         free(lp, M_DEVBUF);
574 }
575 
576 void
577 lacp_req(struct lagg_softc *sc, caddr_t data)
578 {
579         struct lacp_opreq *req = (struct lacp_opreq *)data;
580         struct lacp_softc *lsc = LACP_SOFTC(sc);
581         struct lacp_aggregator *la = lsc->lsc_active_aggregator;
582 
583         LACP_LOCK(lsc);
584         bzero(req, sizeof(struct lacp_opreq));
585         if (la != NULL) {
586                 req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio);
587                 memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac,
588                     ETHER_ADDR_LEN);
589                 req->actor_key = ntohs(la->la_actor.lip_key);
590                 req->actor_portprio = ntohs(la->la_actor.lip_portid.lpi_prio);
591                 req->actor_portno = ntohs(la->la_actor.lip_portid.lpi_portno);
592                 req->actor_state = la->la_actor.lip_state;
593 
594                 req->partner_prio = ntohs(la->la_partner.lip_systemid.lsi_prio);
595                 memcpy(&req->partner_mac, &la->la_partner.lip_systemid.lsi_mac,
596                     ETHER_ADDR_LEN);
597                 req->partner_key = ntohs(la->la_partner.lip_key);
598                 req->partner_portprio = ntohs(la->la_partner.lip_portid.lpi_prio);
599                 req->partner_portno = ntohs(la->la_partner.lip_portid.lpi_portno);
600                 req->partner_state = la->la_partner.lip_state;
601         }
602         LACP_UNLOCK(lsc);
603 }
604 
605 void
606 lacp_portreq(struct lagg_port *lgp, caddr_t data)
607 {
608         struct lacp_opreq *req = (struct lacp_opreq *)data;
609         struct lacp_port *lp = LACP_PORT(lgp);
610         struct lacp_softc *lsc = lp->lp_lsc;
611 
612         LACP_LOCK(lsc);
613         req->actor_prio = ntohs(lp->lp_actor.lip_systemid.lsi_prio);
614         memcpy(&req->actor_mac, &lp->lp_actor.lip_systemid.lsi_mac,
615             ETHER_ADDR_LEN);
616         req->actor_key = ntohs(lp->lp_actor.lip_key);
617         req->actor_portprio = ntohs(lp->lp_actor.lip_portid.lpi_prio);
618         req->actor_portno = ntohs(lp->lp_actor.lip_portid.lpi_portno);
619         req->actor_state = lp->lp_actor.lip_state;
620 
621         req->partner_prio = ntohs(lp->lp_partner.lip_systemid.lsi_prio);
622         memcpy(&req->partner_mac, &lp->lp_partner.lip_systemid.lsi_mac,
623             ETHER_ADDR_LEN);
624         req->partner_key = ntohs(lp->lp_partner.lip_key);
625         req->partner_portprio = ntohs(lp->lp_partner.lip_portid.lpi_prio);
626         req->partner_portno = ntohs(lp->lp_partner.lip_portid.lpi_portno);
627         req->partner_state = lp->lp_partner.lip_state;
628         LACP_UNLOCK(lsc);
629 }
630 
631 static void
632 lacp_disable_collecting(struct lacp_port *lp)
633 {
634         LACP_DPRINTF((lp, "collecting disabled\n"));
635         lp->lp_state &= ~LACP_STATE_COLLECTING;
636 }
637 
638 static void
639 lacp_enable_collecting(struct lacp_port *lp)
640 {
641         LACP_DPRINTF((lp, "collecting enabled\n"));
642         lp->lp_state |= LACP_STATE_COLLECTING;
643 }
644 
645 static void
646 lacp_disable_distributing(struct lacp_port *lp)
647 {
648         struct lacp_aggregator *la = lp->lp_aggregator;
649         struct lacp_softc *lsc = lp->lp_lsc;
650 #if defined(LACP_DEBUG)
651         char buf[LACP_LAGIDSTR_MAX+1];
652 #endif /* defined(LACP_DEBUG) */
653 
654         LACP_LOCK_ASSERT(lsc);
655 
656         if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
657                 return;
658         }
659 
660         KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
661         KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
662         KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
663 
664         LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
665             "nports %d -> %d\n",
666             lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
667             la->la_nports, la->la_nports - 1));
668 
669         TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
670         la->la_nports--;
671 
672         if (lsc->lsc_active_aggregator == la) {
673                 lacp_suppress_distributing(lsc, la);
674                 lacp_select_active_aggregator(lsc);
675                 /* regenerate the port map, the active aggregator has changed */
676                 lacp_update_portmap(lsc);
677         }
678 
679         lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
680 }
681 
682 static void
683 lacp_enable_distributing(struct lacp_port *lp)
684 {
685         struct lacp_aggregator *la = lp->lp_aggregator;
686         struct lacp_softc *lsc = lp->lp_lsc;
687 #if defined(LACP_DEBUG)
688         char buf[LACP_LAGIDSTR_MAX+1];
689 #endif /* defined(LACP_DEBUG) */
690 
691         LACP_LOCK_ASSERT(lsc);
692 
693         if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
694                 return;
695         }
696 
697         LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
698             "nports %d -> %d\n",
699             lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
700             la->la_nports, la->la_nports + 1));
701 
702         KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
703         TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
704         la->la_nports++;
705 
706         lp->lp_state |= LACP_STATE_DISTRIBUTING;
707 
708         if (lsc->lsc_active_aggregator == la) {
709                 lacp_suppress_distributing(lsc, la);
710                 lacp_update_portmap(lsc);
711         } else
712                 /* try to become the active aggregator */
713                 lacp_select_active_aggregator(lsc);
714 }
715 
716 static void
717 lacp_transit_expire(void *vp)
718 {
719         struct lacp_softc *lsc = vp;
720 
721         LACP_LOCK_ASSERT(lsc);
722 
723         LACP_DPRINTF((NULL, "%s\n", __func__));
724         lsc->lsc_suppress_distributing = FALSE;
725 }
726 
727 int
728 lacp_attach(struct lagg_softc *sc)
729 {
730         struct lacp_softc *lsc;
731 
732         lsc = malloc(sizeof(struct lacp_softc),
733             M_DEVBUF, M_NOWAIT|M_ZERO);
734         if (lsc == NULL)
735                 return (ENOMEM);
736 
737         sc->sc_psc = (caddr_t)lsc;
738         lsc->lsc_softc = sc;
739 
740         lsc->lsc_hashkey = arc4random();
741         lsc->lsc_active_aggregator = NULL;
742         LACP_LOCK_INIT(lsc);
743         TAILQ_INIT(&lsc->lsc_aggregators);
744         LIST_INIT(&lsc->lsc_ports);
745 
746         callout_init_mtx(&lsc->lsc_transit_callout, &lsc->lsc_mtx, 0);
747         callout_init_mtx(&lsc->lsc_callout, &lsc->lsc_mtx, 0);
748 
749         /* if the lagg is already up then do the same */
750         if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
751                 lacp_init(sc);
752 
753         return (0);
754 }
755 
756 int
757 lacp_detach(struct lagg_softc *sc)
758 {
759         struct lacp_softc *lsc = LACP_SOFTC(sc);
760 
761         KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
762             ("aggregators still active"));
763         KASSERT(lsc->lsc_active_aggregator == NULL,
764             ("aggregator still attached"));
765 
766         sc->sc_psc = NULL;
767         callout_drain(&lsc->lsc_transit_callout);
768         callout_drain(&lsc->lsc_callout);
769 
770         LACP_LOCK_DESTROY(lsc);
771         free(lsc, M_DEVBUF);
772         return (0);
773 }
774 
775 void
776 lacp_init(struct lagg_softc *sc)
777 {
778         struct lacp_softc *lsc = LACP_SOFTC(sc);
779 
780         LACP_LOCK(lsc);
781         callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
782         LACP_UNLOCK(lsc);
783 }
784 
785 void
786 lacp_stop(struct lagg_softc *sc)
787 {
788         struct lacp_softc *lsc = LACP_SOFTC(sc);
789 
790         LACP_LOCK(lsc);
791         callout_stop(&lsc->lsc_transit_callout);
792         callout_stop(&lsc->lsc_callout);
793         LACP_UNLOCK(lsc);
794 }
795 
796 struct lagg_port *
797 lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
798 {
799         struct lacp_softc *lsc = LACP_SOFTC(sc);
800         struct lacp_portmap *pm;
801         struct lacp_port *lp;
802         uint32_t hash;
803 
804         if (__predict_false(lsc->lsc_suppress_distributing)) {
805                 LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
806                 return (NULL);
807         }
808 
809         pm = &lsc->lsc_pmap[lsc->lsc_activemap];
810         if (pm->pm_count == 0) {
811                 LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
812                 return (NULL);
813         }
814 
815         hash = lagg_hashmbuf(m, lsc->lsc_hashkey);
816         hash %= pm->pm_count;
817         lp = pm->pm_map[hash];
818 
819         KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
820             ("aggregated port is not distributing"));
821 
822         return (lp->lp_lagg);
823 }
824 /*
825  * lacp_suppress_distributing: drop transmit packets for a while
826  * to preserve packet ordering.
827  */
828 
829 static void
830 lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
831 {
832         struct lacp_port *lp;
833 
834         if (lsc->lsc_active_aggregator != la) {
835                 return;
836         }
837 
838         LACP_DPRINTF((NULL, "%s\n", __func__));
839         lsc->lsc_suppress_distributing = TRUE;
840 
841         /* send a marker frame down each port to verify the queues are empty */
842         LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
843                 lp->lp_flags |= LACP_PORT_MARK;
844                 lacp_xmit_marker(lp);
845         }
846 
847         /* set a timeout for the marker frames */
848         callout_reset(&lsc->lsc_transit_callout,
849             LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
850 }
851 
852 static int
853 lacp_compare_peerinfo(const struct lacp_peerinfo *a,
854     const struct lacp_peerinfo *b)
855 {
856         return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
857 }
858 
859 static int
860 lacp_compare_systemid(const struct lacp_systemid *a,
861     const struct lacp_systemid *b)
862 {
863         return (memcmp(a, b, sizeof(*a)));
864 }
865 
866 #if 0   /* unused */
867 static int
868 lacp_compare_portid(const struct lacp_portid *a,
869     const struct lacp_portid *b)
870 {
871         return (memcmp(a, b, sizeof(*a)));
872 }
873 #endif
874 
875 static uint64_t
876 lacp_aggregator_bandwidth(struct lacp_aggregator *la)
877 {
878         struct lacp_port *lp;
879         uint64_t speed;
880 
881         lp = TAILQ_FIRST(&la->la_ports);
882         if (lp == NULL) {
883                 return (0);
884         }
885 
886         speed = ifmedia_baudrate(lp->lp_media);
887         speed *= la->la_nports;
888         if (speed == 0) {
889                 LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
890                     lp->lp_media, la->la_nports));
891         }
892 
893         return (speed);
894 }
895 
896 /*
897  * lacp_select_active_aggregator: select an aggregator to be used to transmit
898  * packets from lagg(4) interface.
899  */
900 
901 static void
902 lacp_select_active_aggregator(struct lacp_softc *lsc)
903 {
904         struct lacp_aggregator *la;
905         struct lacp_aggregator *best_la = NULL;
906         uint64_t best_speed = 0;
907 #if defined(LACP_DEBUG)
908         char buf[LACP_LAGIDSTR_MAX+1];
909 #endif /* defined(LACP_DEBUG) */
910 
911         LACP_DPRINTF((NULL, "%s:\n", __func__));
912 
913         TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
914                 uint64_t speed;
915 
916                 if (la->la_nports == 0) {
917                         continue;
918                 }
919 
920                 speed = lacp_aggregator_bandwidth(la);
921                 LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
922                     lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
923                     speed, la->la_nports));
924 
925                 /* This aggregator is chosen if
926                  *      the partner has a better system priority
927                  *  or, the total aggregated speed is higher
928                  *  or, it is already the chosen aggregator
929                  */
930                 if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) <
931                      LACP_SYS_PRI(best_la->la_partner)) ||
932                     speed > best_speed ||
933                     (speed == best_speed &&
934                     la == lsc->lsc_active_aggregator)) {
935                         best_la = la;
936                         best_speed = speed;
937                 }
938         }
939 
940         KASSERT(best_la == NULL || best_la->la_nports > 0,
941             ("invalid aggregator refcnt"));
942         KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
943             ("invalid aggregator list"));
944 
945 #if defined(LACP_DEBUG)
946         if (lsc->lsc_active_aggregator != best_la) {
947                 LACP_DPRINTF((NULL, "active aggregator changed\n"));
948                 LACP_DPRINTF((NULL, "old %s\n",
949                     lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
950                     buf, sizeof(buf))));
951         } else {
952                 LACP_DPRINTF((NULL, "active aggregator not changed\n"));
953         }
954         LACP_DPRINTF((NULL, "new %s\n",
955             lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
956 #endif /* defined(LACP_DEBUG) */
957 
958         if (lsc->lsc_active_aggregator != best_la) {
959                 lsc->lsc_active_aggregator = best_la;
960                 lacp_update_portmap(lsc);
961                 if (best_la) {
962                         lacp_suppress_distributing(lsc, best_la);
963                 }
964         }
965 }
966 
967 /*
968  * Updated the inactive portmap array with the new list of ports and
969  * make it live.
970  */
971