The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_lagg.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $      */
    2 
    3 /*
    4  * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
    5  * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
    6  * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
    7  *
    8  * Permission to use, copy, modify, and distribute this software for any
    9  * purpose with or without fee is hereby granted, provided that the above
   10  * copyright notice and this permission notice appear in all copies.
   11  *
   12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   19  */
   20 
   21 #include <sys/cdefs.h>
   22 __FBSDID("$FreeBSD: releng/11.1/sys/net/if_lagg.c 319697 2017-06-08 14:34:39Z mav $");
   23 
   24 #include "opt_inet.h"
   25 #include "opt_inet6.h"
   26 
   27 #include <sys/param.h>
   28 #include <sys/kernel.h>
   29 #include <sys/malloc.h>
   30 #include <sys/mbuf.h>
   31 #include <sys/queue.h>
   32 #include <sys/socket.h>
   33 #include <sys/sockio.h>
   34 #include <sys/sysctl.h>
   35 #include <sys/module.h>
   36 #include <sys/priv.h>
   37 #include <sys/systm.h>
   38 #include <sys/proc.h>
   39 #include <sys/lock.h>
   40 #include <sys/rmlock.h>
   41 #include <sys/sx.h>
   42 #include <sys/taskqueue.h>
   43 #include <sys/eventhandler.h>
   44 
   45 #include <net/ethernet.h>
   46 #include <net/if.h>
   47 #include <net/if_clone.h>
   48 #include <net/if_arp.h>
   49 #include <net/if_dl.h>
   50 #include <net/if_media.h>
   51 #include <net/if_types.h>
   52 #include <net/if_var.h>
   53 #include <net/bpf.h>
   54 #include <net/vnet.h>
   55 
   56 #if defined(INET) || defined(INET6)
   57 #include <netinet/in.h>
   58 #include <netinet/ip.h>
   59 #endif
   60 #ifdef INET
   61 #include <netinet/in_systm.h>
   62 #include <netinet/if_ether.h>
   63 #endif
   64 
   65 #ifdef INET6
   66 #include <netinet/ip6.h>
   67 #include <netinet6/in6_var.h>
   68 #include <netinet6/in6_ifattach.h>
   69 #endif
   70 
   71 #include <net/if_vlan_var.h>
   72 #include <net/if_lagg.h>
   73 #include <net/ieee8023ad_lacp.h>
   74 
   75 /* Special flags we should propagate to the lagg ports. */
   76 static struct {
   77         int flag;
   78         int (*func)(struct ifnet *, int);
   79 } lagg_pflags[] = {
   80         {IFF_PROMISC, ifpromisc},
   81         {IFF_ALLMULTI, if_allmulti},
   82         {0, NULL}
   83 };
   84 
   85 VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
   86 #define V_lagg_list     VNET(lagg_list)
   87 static VNET_DEFINE(struct mtx, lagg_list_mtx);
   88 #define V_lagg_list_mtx VNET(lagg_list_mtx)
   89 #define LAGG_LIST_LOCK_INIT(x)          mtx_init(&V_lagg_list_mtx, \
   90                                         "if_lagg list", NULL, MTX_DEF)
   91 #define LAGG_LIST_LOCK_DESTROY(x)       mtx_destroy(&V_lagg_list_mtx)
   92 #define LAGG_LIST_LOCK(x)               mtx_lock(&V_lagg_list_mtx)
   93 #define LAGG_LIST_UNLOCK(x)             mtx_unlock(&V_lagg_list_mtx)
   94 eventhandler_tag        lagg_detach_cookie = NULL;
   95 
   96 static int      lagg_clone_create(struct if_clone *, int, caddr_t);
   97 static void     lagg_clone_destroy(struct ifnet *);
   98 static VNET_DEFINE(struct if_clone *, lagg_cloner);
   99 #define V_lagg_cloner   VNET(lagg_cloner)
  100 static const char laggname[] = "lagg";
  101 
  102 static void     lagg_capabilities(struct lagg_softc *);
  103 static int      lagg_port_create(struct lagg_softc *, struct ifnet *);
  104 static int      lagg_port_destroy(struct lagg_port *, int);
  105 static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
  106 static void     lagg_linkstate(struct lagg_softc *);
  107 static void     lagg_port_state(struct ifnet *, int);
  108 static int      lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
  109 static int      lagg_port_output(struct ifnet *, struct mbuf *,
  110                     const struct sockaddr *, struct route *);
  111 static void     lagg_port_ifdetach(void *arg __unused, struct ifnet *);
  112 #ifdef LAGG_PORT_STACKING
  113 static int      lagg_port_checkstacking(struct lagg_softc *);
  114 #endif
  115 static void     lagg_port2req(struct lagg_port *, struct lagg_reqport *);
  116 static void     lagg_init(void *);
  117 static void     lagg_stop(struct lagg_softc *);
  118 static int      lagg_ioctl(struct ifnet *, u_long, caddr_t);
  119 static int      lagg_setmulti(struct lagg_port *);
  120 static int      lagg_clrmulti(struct lagg_port *);
  121 static  int     lagg_setcaps(struct lagg_port *, int cap);
  122 static  int     lagg_setflag(struct lagg_port *, int, int,
  123                     int (*func)(struct ifnet *, int));
  124 static  int     lagg_setflags(struct lagg_port *, int status);
  125 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
  126 static int      lagg_transmit(struct ifnet *, struct mbuf *);
  127 static void     lagg_qflush(struct ifnet *);
  128 static int      lagg_media_change(struct ifnet *);
  129 static void     lagg_media_status(struct ifnet *, struct ifmediareq *);
  130 static struct lagg_port *lagg_link_active(struct lagg_softc *,
  131             struct lagg_port *);
  132 
  133 /* Simple round robin */
  134 static void     lagg_rr_attach(struct lagg_softc *);
  135 static int      lagg_rr_start(struct lagg_softc *, struct mbuf *);
  136 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
  137                     struct mbuf *);
  138 
  139 /* Active failover */
  140 static int      lagg_fail_start(struct lagg_softc *, struct mbuf *);
  141 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
  142                     struct mbuf *);
  143 
  144 /* Loadbalancing */
  145 static void     lagg_lb_attach(struct lagg_softc *);
  146 static void     lagg_lb_detach(struct lagg_softc *);
  147 static int      lagg_lb_port_create(struct lagg_port *);
  148 static void     lagg_lb_port_destroy(struct lagg_port *);
  149 static int      lagg_lb_start(struct lagg_softc *, struct mbuf *);
  150 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
  151                     struct mbuf *);
  152 static int      lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
  153 
  154 /* Broadcast */
  155 static int    lagg_bcast_start(struct lagg_softc *, struct mbuf *);
  156 static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
  157                     struct mbuf *);
  158 
  159 /* 802.3ad LACP */
  160 static void     lagg_lacp_attach(struct lagg_softc *);
  161 static void     lagg_lacp_detach(struct lagg_softc *);
  162 static int      lagg_lacp_start(struct lagg_softc *, struct mbuf *);
  163 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
  164                     struct mbuf *);
  165 static void     lagg_lacp_lladdr(struct lagg_softc *);
  166 
  167 /* lagg protocol table */
  168 static const struct lagg_proto {
  169         lagg_proto      pr_num;
  170         void            (*pr_attach)(struct lagg_softc *);
  171         void            (*pr_detach)(struct lagg_softc *);
  172         int             (*pr_start)(struct lagg_softc *, struct mbuf *);
  173         struct mbuf *   (*pr_input)(struct lagg_softc *, struct lagg_port *,
  174                             struct mbuf *);
  175         int             (*pr_addport)(struct lagg_port *);
  176         void            (*pr_delport)(struct lagg_port *);
  177         void            (*pr_linkstate)(struct lagg_port *);
  178         void            (*pr_init)(struct lagg_softc *);
  179         void            (*pr_stop)(struct lagg_softc *);
  180         void            (*pr_lladdr)(struct lagg_softc *);
  181         void            (*pr_request)(struct lagg_softc *, void *);
  182         void            (*pr_portreq)(struct lagg_port *, void *);
  183 } lagg_protos[] = {
  184     {
  185         .pr_num = LAGG_PROTO_NONE
  186     },
  187     {
  188         .pr_num = LAGG_PROTO_ROUNDROBIN,
  189         .pr_attach = lagg_rr_attach,
  190         .pr_start = lagg_rr_start,
  191         .pr_input = lagg_rr_input,
  192     },
  193     {
  194         .pr_num = LAGG_PROTO_FAILOVER,
  195         .pr_start = lagg_fail_start,
  196         .pr_input = lagg_fail_input,
  197     },
  198     {
  199         .pr_num = LAGG_PROTO_LOADBALANCE,
  200         .pr_attach = lagg_lb_attach,
  201         .pr_detach = lagg_lb_detach,
  202         .pr_start = lagg_lb_start,
  203         .pr_input = lagg_lb_input,
  204         .pr_addport = lagg_lb_port_create,
  205         .pr_delport = lagg_lb_port_destroy,
  206     },
  207     {
  208         .pr_num = LAGG_PROTO_LACP,
  209         .pr_attach = lagg_lacp_attach,
  210         .pr_detach = lagg_lacp_detach,
  211         .pr_start = lagg_lacp_start,
  212         .pr_input = lagg_lacp_input,
  213         .pr_addport = lacp_port_create,
  214         .pr_delport = lacp_port_destroy,
  215         .pr_linkstate = lacp_linkstate,
  216         .pr_init = lacp_init,
  217         .pr_stop = lacp_stop,
  218         .pr_lladdr = lagg_lacp_lladdr,
  219         .pr_request = lacp_req,
  220         .pr_portreq = lacp_portreq,
  221     },
  222     {
  223         .pr_num = LAGG_PROTO_BROADCAST,
  224         .pr_start = lagg_bcast_start,
  225         .pr_input = lagg_bcast_input,
  226     },
  227 };
  228 
  229 SYSCTL_DECL(_net_link);
  230 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
  231     "Link Aggregation");
  232 
  233 /* Allow input on any failover links */
  234 static VNET_DEFINE(int, lagg_failover_rx_all);
  235 #define V_lagg_failover_rx_all  VNET(lagg_failover_rx_all)
  236 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
  237     &VNET_NAME(lagg_failover_rx_all), 0,
  238     "Accept input from any interface in a failover lagg");
  239 
  240 /* Default value for using flowid */
  241 static VNET_DEFINE(int, def_use_flowid) = 1;
  242 #define V_def_use_flowid        VNET(def_use_flowid)
  243 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
  244     &VNET_NAME(def_use_flowid), 0,
  245     "Default setting for using flow id for load sharing");
  246 
  247 /* Default value for flowid shift */
  248 static VNET_DEFINE(int, def_flowid_shift) = 16;
  249 #define V_def_flowid_shift      VNET(def_flowid_shift)
  250 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
  251     &VNET_NAME(def_flowid_shift), 0,
  252     "Default setting for flowid shift for load sharing");
  253 
  254 static void
  255 vnet_lagg_init(const void *unused __unused)
  256 {
  257 
  258         LAGG_LIST_LOCK_INIT();
  259         SLIST_INIT(&V_lagg_list);
  260         V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
  261             lagg_clone_destroy, 0);
  262 }
  263 VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
  264     vnet_lagg_init, NULL);
  265 
  266 static void
  267 vnet_lagg_uninit(const void *unused __unused)
  268 {
  269 
  270         if_clone_detach(V_lagg_cloner);
  271         LAGG_LIST_LOCK_DESTROY();
  272 }
  273 VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
  274     vnet_lagg_uninit, NULL);
  275 
  276 static int
  277 lagg_modevent(module_t mod, int type, void *data)
  278 {
  279 
  280         switch (type) {
  281         case MOD_LOAD:
  282                 lagg_input_p = lagg_input;
  283                 lagg_linkstate_p = lagg_port_state;
  284                 lagg_detach_cookie = EVENTHANDLER_REGISTER(
  285                     ifnet_departure_event, lagg_port_ifdetach, NULL,
  286                     EVENTHANDLER_PRI_ANY);
  287                 break;
  288         case MOD_UNLOAD:
  289                 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
  290                     lagg_detach_cookie);
  291                 lagg_input_p = NULL;
  292                 lagg_linkstate_p = NULL;
  293                 break;
  294         default:
  295                 return (EOPNOTSUPP);
  296         }
  297         return (0);
  298 }
  299 
  300 static moduledata_t lagg_mod = {
  301         "if_lagg",
  302         lagg_modevent,
  303         0
  304 };
  305 
  306 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  307 MODULE_VERSION(if_lagg, 1);
  308 
  309 static void
  310 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
  311 {
  312 
  313         LAGG_XLOCK_ASSERT(sc);
  314         KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
  315             __func__, sc));
  316 
  317         if (sc->sc_ifflags & IFF_DEBUG)
  318                 if_printf(sc->sc_ifp, "using proto %u\n", pr);
  319 
  320         if (lagg_protos[pr].pr_attach != NULL)
  321                 lagg_protos[pr].pr_attach(sc);
  322         sc->sc_proto = pr;
  323 }
  324 
  325 static void
  326 lagg_proto_detach(struct lagg_softc *sc)
  327 {
  328         lagg_proto pr;
  329 
  330         LAGG_XLOCK_ASSERT(sc);
  331         LAGG_WLOCK_ASSERT(sc);
  332         pr = sc->sc_proto;
  333         sc->sc_proto = LAGG_PROTO_NONE;
  334 
  335         if (lagg_protos[pr].pr_detach != NULL)
  336                 lagg_protos[pr].pr_detach(sc);
  337         else
  338                 LAGG_WUNLOCK(sc);
  339 }
  340 
  341 static int
  342 lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
  343 {
  344 
  345         return (lagg_protos[sc->sc_proto].pr_start(sc, m));
  346 }
  347 
  348 static struct mbuf *
  349 lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
  350 {
  351 
  352         return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
  353 }
  354 
  355 static int
  356 lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
  357 {
  358 
  359         if (lagg_protos[sc->sc_proto].pr_addport == NULL)
  360                 return (0);
  361         else
  362                 return (lagg_protos[sc->sc_proto].pr_addport(lp));
  363 }
  364 
  365 static void
  366 lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
  367 {
  368 
  369         if (lagg_protos[sc->sc_proto].pr_delport != NULL)
  370                 lagg_protos[sc->sc_proto].pr_delport(lp);
  371 }
  372 
  373 static void
  374 lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
  375 {
  376 
  377         if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
  378                 lagg_protos[sc->sc_proto].pr_linkstate(lp);
  379 }
  380 
  381 static void
  382 lagg_proto_init(struct lagg_softc *sc)
  383 {
  384 
  385         if (lagg_protos[sc->sc_proto].pr_init != NULL)
  386                 lagg_protos[sc->sc_proto].pr_init(sc);
  387 }
  388 
  389 static void
  390 lagg_proto_stop(struct lagg_softc *sc)
  391 {
  392 
  393         if (lagg_protos[sc->sc_proto].pr_stop != NULL)
  394                 lagg_protos[sc->sc_proto].pr_stop(sc);
  395 }
  396 
  397 static void
  398 lagg_proto_lladdr(struct lagg_softc *sc)
  399 {
  400 
  401         if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
  402                 lagg_protos[sc->sc_proto].pr_lladdr(sc);
  403 }
  404 
  405 static void
  406 lagg_proto_request(struct lagg_softc *sc, void *v)
  407 {
  408 
  409         if (lagg_protos[sc->sc_proto].pr_request != NULL)
  410                 lagg_protos[sc->sc_proto].pr_request(sc, v);
  411 }
  412 
  413 static void
  414 lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
  415 {
  416 
  417         if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
  418                 lagg_protos[sc->sc_proto].pr_portreq(lp, v);
  419 }
  420 
  421 /*
  422  * This routine is run via an vlan
  423  * config EVENT
  424  */
  425 static void
  426 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  427 {
  428         struct lagg_softc *sc = ifp->if_softc;
  429         struct lagg_port *lp;
  430 
  431         if (ifp->if_softc !=  arg)   /* Not our event */
  432                 return;
  433 
  434         LAGG_SLOCK(sc);
  435         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  436                 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
  437         LAGG_SUNLOCK(sc);
  438 }
  439 
  440 /*
  441  * This routine is run via an vlan
  442  * unconfig EVENT
  443  */
  444 static void
  445 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  446 {
  447         struct lagg_softc *sc = ifp->if_softc;
  448         struct lagg_port *lp;
  449 
  450         if (ifp->if_softc !=  arg)   /* Not our event */
  451                 return;
  452 
  453         LAGG_SLOCK(sc);
  454         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  455                 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
  456         LAGG_SUNLOCK(sc);
  457 }
  458 
  459 static int
  460 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
  461 {
  462         struct lagg_softc *sc;
  463         struct ifnet *ifp;
  464         static const u_char eaddr[6];   /* 00:00:00:00:00:00 */
  465 
  466         sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
  467         ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
  468         if (ifp == NULL) {
  469                 free(sc, M_DEVBUF);
  470                 return (ENOSPC);
  471         }
  472         LAGG_LOCK_INIT(sc);
  473         LAGG_SX_INIT(sc);
  474 
  475         LAGG_XLOCK(sc);
  476         if (V_def_use_flowid)
  477                 sc->sc_opts |= LAGG_OPT_USE_FLOWID;
  478         sc->flowid_shift = V_def_flowid_shift;
  479 
  480         /* Hash all layers by default */
  481         sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
  482 
  483         lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
  484 
  485         SLIST_INIT(&sc->sc_ports);
  486 
  487         /* Initialise pseudo media types */
  488         ifmedia_init(&sc->sc_media, 0, lagg_media_change,
  489             lagg_media_status);
  490         ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
  491         ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
  492 
  493         if_initname(ifp, laggname, unit);
  494         ifp->if_softc = sc;
  495         ifp->if_transmit = lagg_transmit;
  496         ifp->if_qflush = lagg_qflush;
  497         ifp->if_init = lagg_init;
  498         ifp->if_ioctl = lagg_ioctl;
  499         ifp->if_get_counter = lagg_get_counter;
  500         ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
  501         ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
  502 
  503         /*
  504          * Attach as an ordinary ethernet device, children will be attached
  505          * as special device IFT_IEEE8023ADLAG.
  506          */
  507         ether_ifattach(ifp, eaddr);
  508 
  509         sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
  510                 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
  511         sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
  512                 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
  513 
  514         /* Insert into the global list of laggs */
  515         LAGG_LIST_LOCK();
  516         SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
  517         LAGG_LIST_UNLOCK();
  518         LAGG_XUNLOCK(sc);
  519 
  520         return (0);
  521 }
  522 
  523 static void
  524 lagg_clone_destroy(struct ifnet *ifp)
  525 {
  526         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  527         struct lagg_port *lp;
  528 
  529         LAGG_XLOCK(sc);
  530         sc->sc_destroying = 1;
  531         lagg_stop(sc);
  532         ifp->if_flags &= ~IFF_UP;
  533 
  534         EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
  535         EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
  536 
  537         /* Shutdown and remove lagg ports */
  538         while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
  539                 lagg_port_destroy(lp, 1);
  540 
  541         /* Unhook the aggregation protocol */
  542         LAGG_WLOCK(sc);
  543         lagg_proto_detach(sc);
  544         LAGG_UNLOCK_ASSERT(sc);
  545         LAGG_XUNLOCK(sc);
  546 
  547         ifmedia_removeall(&sc->sc_media);
  548         ether_ifdetach(ifp);
  549         if_free(ifp);
  550 
  551         LAGG_LIST_LOCK();
  552         SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
  553         LAGG_LIST_UNLOCK();
  554 
  555         LAGG_SX_DESTROY(sc);
  556         LAGG_LOCK_DESTROY(sc);
  557         free(sc, M_DEVBUF);
  558 }
  559 
  560 static void
  561 lagg_capabilities(struct lagg_softc *sc)
  562 {
  563         struct lagg_port *lp;
  564         int cap, ena, pena;
  565         uint64_t hwa;
  566         struct ifnet_hw_tsomax hw_tsomax;
  567 
  568         LAGG_XLOCK_ASSERT(sc);
  569 
  570         /* Get common enabled capabilities for the lagg ports */
  571         ena = ~0;
  572         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  573                 ena &= lp->lp_ifp->if_capenable;
  574         ena = (ena == ~0 ? 0 : ena);
  575 
  576         /*
  577          * Apply common enabled capabilities back to the lagg ports.
  578          * May require several iterations if they are dependent.
  579          */
  580         do {
  581                 pena = ena;
  582                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  583                         lagg_setcaps(lp, ena);
  584                         ena &= lp->lp_ifp->if_capenable;
  585                 }
  586         } while (pena != ena);
  587 
  588         /* Get other capabilities from the lagg ports */
  589         cap = ~0;
  590         hwa = ~(uint64_t)0;
  591         memset(&hw_tsomax, 0, sizeof(hw_tsomax));
  592         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  593                 cap &= lp->lp_ifp->if_capabilities;
  594                 hwa &= lp->lp_ifp->if_hwassist;
  595                 if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
  596         }
  597         cap = (cap == ~0 ? 0 : cap);
  598         hwa = (hwa == ~(uint64_t)0 ? 0 : hwa);
  599 
  600         if (sc->sc_ifp->if_capabilities != cap ||
  601             sc->sc_ifp->if_capenable != ena ||
  602             sc->sc_ifp->if_hwassist != hwa ||
  603             if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
  604                 sc->sc_ifp->if_capabilities = cap;
  605                 sc->sc_ifp->if_capenable = ena;
  606                 sc->sc_ifp->if_hwassist = hwa;
  607                 getmicrotime(&sc->sc_ifp->if_lastchange);
  608 
  609                 if (sc->sc_ifflags & IFF_DEBUG)
  610                         if_printf(sc->sc_ifp,
  611                             "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
  612         }
  613 }
  614 
  615 static int
  616 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
  617 {
  618         struct lagg_softc *sc_ptr;
  619         struct lagg_port *lp, *tlp;
  620         int error, i;
  621         uint64_t *pval;
  622 
  623         LAGG_XLOCK_ASSERT(sc);
  624 
  625         /* Limit the maximal number of lagg ports */
  626         if (sc->sc_count >= LAGG_MAX_PORTS)
  627                 return (ENOSPC);
  628 
  629         /* Check if port has already been associated to a lagg */
  630         if (ifp->if_lagg != NULL) {
  631                 /* Port is already in the current lagg? */
  632                 lp = (struct lagg_port *)ifp->if_lagg;
  633                 if (lp->lp_softc == sc)
  634                         return (EEXIST);
  635                 return (EBUSY);
  636         }
  637 
  638         /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
  639         if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
  640                 return (EPROTONOSUPPORT);
  641 
  642         /* Allow the first Ethernet member to define the MTU */
  643         if (SLIST_EMPTY(&sc->sc_ports))
  644                 sc->sc_ifp->if_mtu = ifp->if_mtu;
  645         else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
  646                 if_printf(sc->sc_ifp, "invalid MTU for %s\n",
  647                     ifp->if_xname);
  648                 return (EINVAL);
  649         }
  650 
  651         lp = malloc(sizeof(struct lagg_port), M_DEVBUF, M_WAITOK|M_ZERO);
  652         lp->lp_softc = sc;
  653 
  654         /* Check if port is a stacked lagg */
  655         LAGG_LIST_LOCK();
  656         SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
  657                 if (ifp == sc_ptr->sc_ifp) {
  658                         LAGG_LIST_UNLOCK();
  659                         free(lp, M_DEVBUF);
  660                         return (EINVAL);
  661                         /* XXX disable stacking for the moment, its untested */
  662 #ifdef LAGG_PORT_STACKING
  663                         lp->lp_flags |= LAGG_PORT_STACK;
  664                         if (lagg_port_checkstacking(sc_ptr) >=
  665                             LAGG_MAX_STACKING) {
  666                                 LAGG_LIST_UNLOCK();
  667                                 free(lp, M_DEVBUF);
  668                                 return (E2BIG);
  669                         }
  670 #endif
  671                 }
  672         }
  673         LAGG_LIST_UNLOCK();
  674 
  675         if_ref(ifp);
  676         lp->lp_ifp = ifp;
  677 
  678         bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
  679         lp->lp_ifcapenable = ifp->if_capenable;
  680         if (SLIST_EMPTY(&sc->sc_ports)) {
  681                 LAGG_WLOCK(sc);
  682                 bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
  683                 lagg_proto_lladdr(sc);
  684                 LAGG_WUNLOCK(sc);
  685                 EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  686         } else {
  687                 if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
  688         }
  689         lagg_setflags(lp, 1);
  690 
  691         LAGG_WLOCK(sc);
  692         if (SLIST_EMPTY(&sc->sc_ports))
  693                 sc->sc_primary = lp;
  694 
  695         /* Change the interface type */
  696         lp->lp_iftype = ifp->if_type;
  697         ifp->if_type = IFT_IEEE8023ADLAG;
  698         ifp->if_lagg = lp;
  699         lp->lp_ioctl = ifp->if_ioctl;
  700         ifp->if_ioctl = lagg_port_ioctl;
  701         lp->lp_output = ifp->if_output;
  702         ifp->if_output = lagg_port_output;
  703 
  704         /* Read port counters */
  705         pval = lp->port_counters.val;
  706         for (i = 0; i < IFCOUNTERS; i++, pval++)
  707                 *pval = ifp->if_get_counter(ifp, i);
  708 
  709         /*
  710          * Insert into the list of ports.
  711          * Keep ports sorted by if_index. It is handy, when configuration
  712          * is predictable and `ifconfig laggN create ...` command
  713          * will lead to the same result each time.
  714          */
  715         SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
  716                 if (tlp->lp_ifp->if_index < ifp->if_index && (
  717                     SLIST_NEXT(tlp, lp_entries) == NULL ||
  718                     SLIST_NEXT(tlp, lp_entries)->lp_ifp->if_index >
  719                     ifp->if_index))
  720                         break;
  721         }
  722         if (tlp != NULL)
  723                 SLIST_INSERT_AFTER(tlp, lp, lp_entries);
  724         else
  725                 SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
  726         sc->sc_count++;
  727 
  728         lagg_setmulti(lp);
  729 
  730         if ((error = lagg_proto_addport(sc, lp)) != 0) {
  731                 /* Remove the port, without calling pr_delport. */
  732                 lagg_port_destroy(lp, 0);
  733                 LAGG_UNLOCK_ASSERT(sc);
  734                 return (error);
  735         }
  736 
  737         LAGG_WUNLOCK(sc);
  738 
  739         /* Update lagg capabilities */
  740         lagg_capabilities(sc);
  741         lagg_linkstate(sc);
  742 
  743         return (0);
  744 }
  745 
  746 #ifdef LAGG_PORT_STACKING
  747 static int
  748 lagg_port_checkstacking(struct lagg_softc *sc)
  749 {
  750         struct lagg_softc *sc_ptr;
  751         struct lagg_port *lp;
  752         int m = 0;
  753 
  754         LAGG_SXLOCK_ASSERT(sc);
  755         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  756                 if (lp->lp_flags & LAGG_PORT_STACK) {
  757                         sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
  758                         m = MAX(m, lagg_port_checkstacking(sc_ptr));
  759                 }
  760         }
  761 
  762         return (m + 1);
  763 }
  764 #endif
  765 
  766 static int
  767 lagg_port_destroy(struct lagg_port *lp, int rundelport)
  768 {
  769         struct lagg_softc *sc = lp->lp_softc;
  770         struct lagg_port *lp_ptr, *lp0;
  771         struct ifnet *ifp = lp->lp_ifp;
  772         uint64_t *pval, vdiff;
  773         int i;
  774 
  775         LAGG_XLOCK_ASSERT(sc);
  776 
  777         if (rundelport) {
  778                 LAGG_WLOCK(sc);
  779                 lagg_proto_delport(sc, lp);
  780         } else
  781                 LAGG_WLOCK_ASSERT(sc);
  782 
  783         if (lp->lp_detaching == 0)
  784                 lagg_clrmulti(lp);
  785 
  786         /* Restore interface */
  787         ifp->if_type = lp->lp_iftype;
  788         ifp->if_ioctl = lp->lp_ioctl;
  789         ifp->if_output = lp->lp_output;
  790         ifp->if_lagg = NULL;
  791 
  792         /* Update detached port counters */
  793         pval = lp->port_counters.val;
  794         for (i = 0; i < IFCOUNTERS; i++, pval++) {
  795                 vdiff = ifp->if_get_counter(ifp, i) - *pval;
  796                 sc->detached_counters.val[i] += vdiff;
  797         }
  798 
  799         /* Finally, remove the port from the lagg */
  800         SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
  801         sc->sc_count--;
  802 
  803         /* Update the primary interface */
  804         if (lp == sc->sc_primary) {
  805                 uint8_t lladdr[ETHER_ADDR_LEN];
  806 
  807                 if ((lp0 = SLIST_FIRST(&sc->sc_ports)) == NULL)
  808                         bzero(&lladdr, ETHER_ADDR_LEN);
  809                 else
  810                         bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN);
  811                 sc->sc_primary = lp0;
  812                 if (sc->sc_destroying == 0) {
  813                         bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
  814                         lagg_proto_lladdr(sc);
  815                         LAGG_WUNLOCK(sc);
  816                         EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  817                 } else
  818                         LAGG_WUNLOCK(sc);
  819 
  820                 /*
  821                  * Update lladdr for each port (new primary needs update
  822                  * as well, to switch from old lladdr to its 'real' one)
  823                  */
  824                 SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
  825                         if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN);
  826         } else
  827                 LAGG_WUNLOCK(sc);
  828 
  829         if (lp->lp_ifflags)
  830                 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
  831 
  832         if (lp->lp_detaching == 0) {
  833                 lagg_setflags(lp, 0);
  834                 lagg_setcaps(lp, lp->lp_ifcapenable);
  835                 if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN);
  836         }
  837 
  838         if_rele(ifp);
  839         free(lp, M_DEVBUF);
  840 
  841         /* Update lagg capabilities */
  842         lagg_capabilities(sc);
  843         lagg_linkstate(sc);
  844 
  845         return (0);
  846 }
  847 
  848 static int
  849 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  850 {
  851         struct lagg_reqport *rp = (struct lagg_reqport *)data;
  852         struct lagg_softc *sc;
  853         struct lagg_port *lp = NULL;
  854         int error = 0;
  855 
  856         /* Should be checked by the caller */
  857         if (ifp->if_type != IFT_IEEE8023ADLAG ||
  858             (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
  859                 goto fallback;
  860 
  861         switch (cmd) {
  862         case SIOCGLAGGPORT:
  863                 if (rp->rp_portname[0] == '\0' ||
  864                     ifunit(rp->rp_portname) != ifp) {
  865                         error = EINVAL;
  866                         break;
  867                 }
  868 
  869                 LAGG_SLOCK(sc);
  870                 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
  871                         error = ENOENT;
  872                         LAGG_SUNLOCK(sc);
  873                         break;
  874                 }
  875 
  876                 lagg_port2req(lp, rp);
  877                 LAGG_SUNLOCK(sc);
  878                 break;
  879 
  880         case SIOCSIFCAP:
  881                 if (lp->lp_ioctl == NULL) {
  882                         error = EINVAL;
  883                         break;
  884                 }
  885                 error = (*lp->lp_ioctl)(ifp, cmd, data);
  886                 if (error)
  887                         break;
  888 
  889                 /* Update lagg interface capabilities */
  890                 LAGG_XLOCK(sc);
  891                 lagg_capabilities(sc);
  892                 LAGG_XUNLOCK(sc);
  893                 VLAN_CAPABILITIES(sc->sc_ifp);
  894                 break;
  895 
  896         case SIOCSIFMTU:
  897                 /* Do not allow the MTU to be changed once joined */
  898                 error = EINVAL;
  899                 break;
  900 
  901         default:
  902                 goto fallback;
  903         }
  904 
  905         return (error);
  906 
  907 fallback:
  908         if (lp != NULL && lp->lp_ioctl != NULL)
  909                 return ((*lp->lp_ioctl)(ifp, cmd, data));
  910 
  911         return (EINVAL);
  912 }
  913 
  914 /*
  915  * Requests counter @cnt data. 
  916  *
  917  * Counter value is calculated the following way:
  918  * 1) for each port, sum  difference between current and "initial" measurements.
  919  * 2) add lagg logical interface counters.
  920  * 3) add data from detached_counters array.
  921  *
  922  * We also do the following things on ports attach/detach:
  923  * 1) On port attach we store all counters it has into port_counter array. 
  924  * 2) On port detach we add the different between "initial" and
  925  *   current counters data to detached_counters array.
  926  */
  927 static uint64_t
  928 lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
  929 {
  930         struct lagg_softc *sc;
  931         struct lagg_port *lp;
  932         struct ifnet *lpifp;
  933         struct rm_priotracker tracker;
  934         uint64_t newval, oldval, vsum;
  935 
  936         /* Revise this when we've got non-generic counters. */
  937         KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
  938 
  939         sc = (struct lagg_softc *)ifp->if_softc;
  940         LAGG_RLOCK(sc, &tracker);
  941 
  942         vsum = 0;
  943         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  944                 /* Saved attached value */
  945                 oldval = lp->port_counters.val[cnt];
  946                 /* current value */
  947                 lpifp = lp->lp_ifp;
  948                 newval = lpifp->if_get_counter(lpifp, cnt);
  949                 /* Calculate diff and save new */
  950                 vsum += newval - oldval;
  951         }
  952 
  953         /*
  954          * Add counter data which might be added by upper
  955          * layer protocols operating on logical interface.
  956          */
  957         vsum += if_get_counter_default(ifp, cnt);
  958 
  959         /*
  960          * Add counter data from detached ports counters
  961          */
  962         vsum += sc->detached_counters.val[cnt];
  963 
  964         LAGG_RUNLOCK(sc, &tracker);
  965 
  966         return (vsum);
  967 }
  968 
  969 /*
  970  * For direct output to child ports.
  971  */
  972 static int
  973 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
  974         const struct sockaddr *dst, struct route *ro)
  975 {
  976         struct lagg_port *lp = ifp->if_lagg;
  977 
  978         switch (dst->sa_family) {
  979                 case pseudo_AF_HDRCMPLT:
  980                 case AF_UNSPEC:
  981                         return ((*lp->lp_output)(ifp, m, dst, ro));
  982         }
  983 
  984         /* drop any other frames */
  985         m_freem(m);
  986         return (ENETDOWN);
  987 }
  988 
  989 static void
  990 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
  991 {
  992         struct lagg_port *lp;
  993         struct lagg_softc *sc;
  994 
  995         if ((lp = ifp->if_lagg) == NULL)
  996                 return;
  997         /* If the ifnet is just being renamed, don't do anything. */
  998         if (ifp->if_flags & IFF_RENAMING)
  999                 return;
 1000 
 1001         sc = lp->lp_softc;
 1002 
 1003         LAGG_XLOCK(sc);
 1004         lp->lp_detaching = 1;
 1005         lagg_port_destroy(lp, 1);
 1006         LAGG_XUNLOCK(sc);
 1007         VLAN_CAPABILITIES(sc->sc_ifp);
 1008 }
 1009 
 1010 static void
 1011 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
 1012 {
 1013         struct lagg_softc *sc = lp->lp_softc;
 1014 
 1015         strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
 1016         strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
 1017         rp->rp_prio = lp->lp_prio;
 1018         rp->rp_flags = lp->lp_flags;
 1019         lagg_proto_portreq(sc, lp, &rp->rp_psc);
 1020 
 1021         /* Add protocol specific flags */
 1022         switch (sc->sc_proto) {
 1023                 case LAGG_PROTO_FAILOVER:
 1024                         if (lp == sc->sc_primary)
 1025                                 rp->rp_flags |= LAGG_PORT_MASTER;
 1026                         if (lp == lagg_link_active(sc, sc->sc_primary))
 1027                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1028                         break;
 1029 
 1030                 case LAGG_PROTO_ROUNDROBIN:
 1031                 case LAGG_PROTO_LOADBALANCE:
 1032                 case LAGG_PROTO_BROADCAST:
 1033                         if (LAGG_PORTACTIVE(lp))
 1034                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1035                         break;
 1036 
 1037                 case LAGG_PROTO_LACP:
 1038                         /* LACP has a different definition of active */
 1039                         if (lacp_isactive(lp))
 1040                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1041                         if (lacp_iscollecting(lp))
 1042                                 rp->rp_flags |= LAGG_PORT_COLLECTING;
 1043                         if (lacp_isdistributing(lp))
 1044                                 rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
 1045                         break;
 1046         }
 1047 
 1048 }
 1049 
 1050 static void
 1051 lagg_init(void *xsc)
 1052 {
 1053         struct lagg_softc *sc = (struct lagg_softc *)xsc;
 1054         struct ifnet *ifp = sc->sc_ifp;
 1055         struct lagg_port *lp;
 1056 
 1057         LAGG_XLOCK(sc);
 1058         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 1059                 LAGG_XUNLOCK(sc);
 1060                 return;
 1061         }
 1062 
 1063         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 1064 
 1065         /*
 1066          * Update the port lladdrs if needed.
 1067          * This might be if_setlladdr() notification
 1068          * that lladdr has been changed.
 1069          */
 1070         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1071                 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
 1072                     ETHER_ADDR_LEN) != 0)
 1073                         if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 1074         }
 1075 
 1076         lagg_proto_init(sc);
 1077 
 1078         LAGG_XUNLOCK(sc);
 1079 }
 1080 
 1081 static void
 1082 lagg_stop(struct lagg_softc *sc)
 1083 {
 1084         struct ifnet *ifp = sc->sc_ifp;
 1085 
 1086         LAGG_XLOCK_ASSERT(sc);
 1087 
 1088         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 1089                 return;
 1090 
 1091         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 1092 
 1093         lagg_proto_stop(sc);
 1094 }
 1095 
 1096 static int
 1097 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 1098 {
 1099         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1100         struct lagg_reqall *ra = (struct lagg_reqall *)data;
 1101         struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
 1102         struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
 1103         struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
 1104         struct ifreq *ifr = (struct ifreq *)data;
 1105         struct lagg_port *lp;
 1106         struct ifnet *tpif;
 1107         struct thread *td = curthread;
 1108         char *buf, *outbuf;
 1109         int count, buflen, len, error = 0;
 1110 
 1111         bzero(&rpbuf, sizeof(rpbuf));
 1112 
 1113         switch (cmd) {
 1114         case SIOCGLAGG:
 1115                 LAGG_SLOCK(sc);
 1116                 buflen = sc->sc_count * sizeof(struct lagg_reqport);
 1117                 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 1118                 ra->ra_proto = sc->sc_proto;
 1119                 lagg_proto_request(sc, &ra->ra_psc);
 1120                 count = 0;
 1121                 buf = outbuf;
 1122                 len = min(ra->ra_size, buflen);
 1123                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1124                         if (len < sizeof(rpbuf))
 1125                                 break;
 1126 
 1127                         lagg_port2req(lp, &rpbuf);
 1128                         memcpy(buf, &rpbuf, sizeof(rpbuf));
 1129                         count++;
 1130                         buf += sizeof(rpbuf);
 1131                         len -= sizeof(rpbuf);
 1132                 }
 1133                 LAGG_SUNLOCK(sc);
 1134                 ra->ra_ports = count;
 1135                 ra->ra_size = count * sizeof(rpbuf);
 1136                 error = copyout(outbuf, ra->ra_port, ra->ra_size);
 1137                 free(outbuf, M_TEMP);
 1138                 break;
 1139         case SIOCSLAGG:
 1140                 error = priv_check(td, PRIV_NET_LAGG);
 1141                 if (error)
 1142                         break;
 1143                 if (ra->ra_proto >= LAGG_PROTO_MAX) {
 1144                         error = EPROTONOSUPPORT;
 1145                         break;
 1146                 }
 1147 
 1148                 LAGG_XLOCK(sc);
 1149                 LAGG_WLOCK(sc);
 1150                 lagg_proto_detach(sc);
 1151                 LAGG_UNLOCK_ASSERT(sc);
 1152                 lagg_proto_attach(sc, ra->ra_proto);
 1153                 LAGG_XUNLOCK(sc);
 1154                 break;
 1155         case SIOCGLAGGOPTS:
 1156                 LAGG_SLOCK(sc);
 1157                 ro->ro_opts = sc->sc_opts;
 1158                 if (sc->sc_proto == LAGG_PROTO_LACP) {
 1159                         struct lacp_softc *lsc;
 1160 
 1161                         lsc = (struct lacp_softc *)sc->sc_psc;
 1162                         if (lsc->lsc_debug.lsc_tx_test != 0)
 1163                                 ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
 1164                         if (lsc->lsc_debug.lsc_rx_test != 0)
 1165                                 ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
 1166                         if (lsc->lsc_strict_mode != 0)
 1167                                 ro->ro_opts |= LAGG_OPT_LACP_STRICT;
 1168                         if (lsc->lsc_fast_timeout != 0)
 1169                                 ro->ro_opts |= LAGG_OPT_LACP_TIMEOUT;
 1170 
 1171                         ro->ro_active = sc->sc_active;
 1172                 } else {
 1173                         ro->ro_active = 0;
 1174                         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1175                                 ro->ro_active += LAGG_PORTACTIVE(lp);
 1176                 }
 1177                 ro->ro_bkt = sc->sc_bkt;
 1178                 ro->ro_flapping = sc->sc_flapping;
 1179                 ro->ro_flowid_shift = sc->flowid_shift;
 1180                 LAGG_SUNLOCK(sc);
 1181                 break;
 1182         case SIOCSLAGGOPTS:
 1183                 if (sc->sc_proto == LAGG_PROTO_ROUNDROBIN) {
 1184                         if (ro->ro_bkt == 0)
 1185                                 sc->sc_bkt = 1; // Minimum 1 packet per iface.
 1186                         else
 1187                                 sc->sc_bkt = ro->ro_bkt;
 1188                 }
 1189                 error = priv_check(td, PRIV_NET_LAGG);
 1190                 if (error)
 1191                         break;
 1192                 if (ro->ro_opts == 0)
 1193                         break;
 1194                 /*
 1195                  * Set options.  LACP options are stored in sc->sc_psc,
 1196                  * not in sc_opts.
 1197                  */
 1198                 int valid, lacp;
 1199 
 1200                 switch (ro->ro_opts) {
 1201                 case LAGG_OPT_USE_FLOWID:
 1202                 case -LAGG_OPT_USE_FLOWID:
 1203                 case LAGG_OPT_FLOWIDSHIFT:
 1204                         valid = 1;
 1205                         lacp = 0;
 1206                         break;
 1207                 case LAGG_OPT_LACP_TXTEST:
 1208                 case -LAGG_OPT_LACP_TXTEST:
 1209                 case LAGG_OPT_LACP_RXTEST:
 1210                 case -LAGG_OPT_LACP_RXTEST:
 1211                 case LAGG_OPT_LACP_STRICT:
 1212                 case -LAGG_OPT_LACP_STRICT:
 1213                 case LAGG_OPT_LACP_TIMEOUT:
 1214                 case -LAGG_OPT_LACP_TIMEOUT:
 1215                         valid = lacp = 1;
 1216                         break;
 1217                 default:
 1218                         valid = lacp = 0;
 1219                         break;
 1220                 }
 1221 
 1222                 LAGG_XLOCK(sc);
 1223 
 1224                 if (valid == 0 ||
 1225                     (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
 1226                         /* Invalid combination of options specified. */
 1227                         error = EINVAL;
 1228                         LAGG_XUNLOCK(sc);
 1229                         break;  /* Return from SIOCSLAGGOPTS. */ 
 1230                 }
 1231                 /*
 1232                  * Store new options into sc->sc_opts except for
 1233                  * FLOWIDSHIFT and LACP options.
 1234                  */
 1235                 if (lacp == 0) {
 1236                         if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
 1237                                 sc->flowid_shift = ro->ro_flowid_shift;
 1238                         else if (ro->ro_opts > 0)
 1239                                 sc->sc_opts |= ro->ro_opts;
 1240                         else
 1241                                 sc->sc_opts &= ~ro->ro_opts;
 1242                 } else {
 1243                         struct lacp_softc *lsc;
 1244                         struct lacp_port *lp;
 1245 
 1246                         lsc = (struct lacp_softc *)sc->sc_psc;
 1247 
 1248                         switch (ro->ro_opts) {
 1249                         case LAGG_OPT_LACP_TXTEST:
 1250                                 lsc->lsc_debug.lsc_tx_test = 1;
 1251                                 break;
 1252                         case -LAGG_OPT_LACP_TXTEST:
 1253                                 lsc->lsc_debug.lsc_tx_test = 0;
 1254                                 break;
 1255                         case LAGG_OPT_LACP_RXTEST:
 1256                                 lsc->lsc_debug.lsc_rx_test = 1;
 1257                                 break;
 1258                         case -LAGG_OPT_LACP_RXTEST:
 1259                                 lsc->lsc_debug.lsc_rx_test = 0;
 1260                                 break;
 1261                         case LAGG_OPT_LACP_STRICT:
 1262                                 lsc->lsc_strict_mode = 1;
 1263                                 break;
 1264                         case -LAGG_OPT_LACP_STRICT:
 1265                                 lsc->lsc_strict_mode = 0;
 1266                                 break;
 1267                         case LAGG_OPT_LACP_TIMEOUT:
 1268                                 LACP_LOCK(lsc);
 1269                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1270                                         lp->lp_state |= LACP_STATE_TIMEOUT;
 1271                                 LACP_UNLOCK(lsc);
 1272                                 lsc->lsc_fast_timeout = 1;
 1273                                 break;
 1274                         case -LAGG_OPT_LACP_TIMEOUT:
 1275                                 LACP_LOCK(lsc);
 1276                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1277                                         lp->lp_state &= ~LACP_STATE_TIMEOUT;
 1278                                 LACP_UNLOCK(lsc);
 1279                                 lsc->lsc_fast_timeout = 0;
 1280                                 break;
 1281                         }
 1282                 }
 1283                 LAGG_XUNLOCK(sc);
 1284                 break;
 1285         case SIOCGLAGGFLAGS:
 1286                 rf->rf_flags = 0;
 1287                 LAGG_SLOCK(sc);
 1288                 if (sc->sc_flags & MBUF_HASHFLAG_L2)
 1289                         rf->rf_flags |= LAGG_F_HASHL2;
 1290                 if (sc->sc_flags & MBUF_HASHFLAG_L3)
 1291                         rf->rf_flags |= LAGG_F_HASHL3;
 1292                 if (sc->sc_flags & MBUF_HASHFLAG_L4)
 1293                         rf->rf_flags |= LAGG_F_HASHL4;
 1294                 LAGG_SUNLOCK(sc);
 1295                 break;
 1296         case SIOCSLAGGHASH:
 1297                 error = priv_check(td, PRIV_NET_LAGG);
 1298                 if (error)
 1299                         break;
 1300                 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
 1301                         error = EINVAL;
 1302                         break;
 1303                 }
 1304                 LAGG_XLOCK(sc);
 1305                 sc->sc_flags = 0;
 1306                 if (rf->rf_flags & LAGG_F_HASHL2)
 1307                         sc->sc_flags |= MBUF_HASHFLAG_L2;
 1308                 if (rf->rf_flags & LAGG_F_HASHL3)
 1309                         sc->sc_flags |= MBUF_HASHFLAG_L3;
 1310                 if (rf->rf_flags & LAGG_F_HASHL4)
 1311                         sc->sc_flags |= MBUF_HASHFLAG_L4;
 1312                 LAGG_XUNLOCK(sc);
 1313                 break;
 1314         case SIOCGLAGGPORT:
 1315                 if (rp->rp_portname[0] == '\0' ||
 1316                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1317                         error = EINVAL;
 1318                         break;
 1319                 }
 1320 
 1321                 LAGG_SLOCK(sc);
 1322                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1323                     lp->lp_softc != sc) {
 1324                         error = ENOENT;
 1325                         LAGG_SUNLOCK(sc);
 1326                         if_rele(tpif);
 1327                         break;
 1328                 }
 1329 
 1330                 lagg_port2req(lp, rp);
 1331                 LAGG_SUNLOCK(sc);
 1332                 if_rele(tpif);
 1333                 break;
 1334         case SIOCSLAGGPORT:
 1335                 error = priv_check(td, PRIV_NET_LAGG);
 1336                 if (error)
 1337                         break;
 1338                 if (rp->rp_portname[0] == '\0' ||
 1339                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1340                         error = EINVAL;
 1341                         break;
 1342                 }
 1343 #ifdef INET6
 1344                 /*
 1345                  * A laggport interface should not have inet6 address
 1346                  * because two interfaces with a valid link-local
 1347                  * scope zone must not be merged in any form.  This
 1348                  * restriction is needed to prevent violation of
 1349                  * link-local scope zone.  Attempts to add a laggport
 1350                  * interface which has inet6 addresses triggers
 1351                  * removal of all inet6 addresses on the member
 1352                  * interface.
 1353                  */
 1354                 if (in6ifa_llaonifp(tpif)) {
 1355                         in6_ifdetach(tpif);
 1356                                 if_printf(sc->sc_ifp,
 1357                                     "IPv6 addresses on %s have been removed "
 1358                                     "before adding it as a member to prevent "
 1359                                     "IPv6 address scope violation.\n",
 1360                                     tpif->if_xname);
 1361                 }
 1362 #endif
 1363                 LAGG_XLOCK(sc);
 1364                 error = lagg_port_create(sc, tpif);
 1365                 LAGG_XUNLOCK(sc);
 1366                 if_rele(tpif);
 1367                 VLAN_CAPABILITIES(ifp);
 1368                 break;
 1369         case SIOCSLAGGDELPORT:
 1370                 error = priv_check(td, PRIV_NET_LAGG);
 1371                 if (error)
 1372                         break;
 1373                 if (rp->rp_portname[0] == '\0' ||
 1374                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1375                         error = EINVAL;
 1376                         break;
 1377                 }
 1378 
 1379                 LAGG_XLOCK(sc);
 1380                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1381                     lp->lp_softc != sc) {
 1382                         error = ENOENT;
 1383                         LAGG_XUNLOCK(sc);
 1384                         if_rele(tpif);
 1385                         break;
 1386                 }
 1387 
 1388                 error = lagg_port_destroy(lp, 1);
 1389                 LAGG_XUNLOCK(sc);
 1390                 if_rele(tpif);
 1391                 VLAN_CAPABILITIES(ifp);
 1392                 break;
 1393         case SIOCSIFFLAGS:
 1394                 /* Set flags on ports too */
 1395                 LAGG_XLOCK(sc);
 1396                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1397                         lagg_setflags(lp, 1);
 1398                 }
 1399 
 1400                 if (!(ifp->if_flags & IFF_UP) &&
 1401                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1402                         /*
 1403                          * If interface is marked down and it is running,
 1404                          * then stop and disable it.
 1405                          */
 1406                         lagg_stop(sc);
 1407                         LAGG_XUNLOCK(sc);
 1408                 } else if ((ifp->if_flags & IFF_UP) &&
 1409                     !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1410                         /*
 1411                          * If interface is marked up and it is stopped, then
 1412                          * start it.
 1413                          */
 1414                         LAGG_XUNLOCK(sc);
 1415                         (*ifp->if_init)(sc);
 1416                 } else
 1417                         LAGG_XUNLOCK(sc);
 1418                 break;
 1419         case SIOCADDMULTI:
 1420         case SIOCDELMULTI:
 1421                 LAGG_WLOCK(sc);
 1422                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1423                         lagg_clrmulti(lp);
 1424                         lagg_setmulti(lp);
 1425                 }
 1426                 LAGG_WUNLOCK(sc);
 1427                 error = 0;
 1428                 break;
 1429         case SIOCSIFMEDIA:
 1430         case SIOCGIFMEDIA:
 1431                 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 1432                 break;
 1433 
 1434         case SIOCSIFCAP:
 1435                 LAGG_XLOCK(sc);
 1436                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1437                         if (lp->lp_ioctl != NULL)
 1438                                 (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1439                 }
 1440                 lagg_capabilities(sc);
 1441                 LAGG_XUNLOCK(sc);
 1442                 VLAN_CAPABILITIES(ifp);
 1443                 error = 0;
 1444                 break;
 1445 
 1446         case SIOCSIFMTU:
 1447                 /* Do not allow the MTU to be directly changed */
 1448                 error = EINVAL;
 1449                 break;
 1450 
 1451         default:
 1452                 error = ether_ioctl(ifp, cmd, data);
 1453                 break;
 1454         }
 1455         return (error);
 1456 }
 1457 
 1458 static int
 1459 lagg_setmulti(struct lagg_port *lp)
 1460 {
 1461         struct lagg_softc *sc = lp->lp_softc;
 1462         struct ifnet *ifp = lp->lp_ifp;
 1463         struct ifnet *scifp = sc->sc_ifp;
 1464         struct lagg_mc *mc;
 1465         struct ifmultiaddr *ifma;
 1466         int error;
 1467 
 1468         LAGG_WLOCK_ASSERT(sc);
 1469         IF_ADDR_WLOCK(scifp);
 1470         TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
 1471                 if (ifma->ifma_addr->sa_family != AF_LINK)
 1472                         continue;
 1473                 mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
 1474                 if (mc == NULL) {
 1475                         IF_ADDR_WUNLOCK(scifp);
 1476                         return (ENOMEM);
 1477                 }
 1478                 bcopy(ifma->ifma_addr, &mc->mc_addr,
 1479                     ifma->ifma_addr->sa_len);
 1480                 mc->mc_addr.sdl_index = ifp->if_index;
 1481                 mc->mc_ifma = NULL;
 1482                 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
 1483         }
 1484         IF_ADDR_WUNLOCK(scifp);
 1485         SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
 1486                 error = if_addmulti(ifp,
 1487                     (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
 1488                 if (error)
 1489                         return (error);
 1490         }
 1491         return (0);
 1492 }
 1493 
 1494 static int
 1495 lagg_clrmulti(struct lagg_port *lp)
 1496 {
 1497         struct lagg_mc *mc;
 1498 
 1499         LAGG_WLOCK_ASSERT(lp->lp_softc);
 1500         while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
 1501                 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
 1502                 if (mc->mc_ifma && lp->lp_detaching == 0)
 1503                         if_delmulti_ifma(mc->mc_ifma);
 1504                 free(mc, M_DEVBUF);
 1505         }
 1506         return (0);
 1507 }
 1508 
 1509 static int
 1510 lagg_setcaps(struct lagg_port *lp, int cap)
 1511 {
 1512         struct ifreq ifr;
 1513 
 1514         if (lp->lp_ifp->if_capenable == cap)
 1515                 return (0);
 1516         if (lp->lp_ioctl == NULL)
 1517                 return (ENXIO);
 1518         ifr.ifr_reqcap = cap;
 1519         return ((*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAP, (caddr_t)&ifr));
 1520 }
 1521 
 1522 /* Handle a ref counted flag that should be set on the lagg port as well */
 1523 static int
 1524 lagg_setflag(struct lagg_port *lp, int flag, int status,
 1525     int (*func)(struct ifnet *, int))
 1526 {
 1527         struct lagg_softc *sc = lp->lp_softc;
 1528         struct ifnet *scifp = sc->sc_ifp;
 1529         struct ifnet *ifp = lp->lp_ifp;
 1530         int error;
 1531 
 1532         LAGG_XLOCK_ASSERT(sc);
 1533 
 1534         status = status ? (scifp->if_flags & flag) : 0;
 1535         /* Now "status" contains the flag value or 0 */
 1536 
 1537         /*
 1538          * See if recorded ports status is different from what
 1539          * we want it to be.  If it is, flip it.  We record ports
 1540          * status in lp_ifflags so that we won't clear ports flag
 1541          * we haven't set.  In fact, we don't clear or set ports
 1542          * flags directly, but get or release references to them.
 1543          * That's why we can be sure that recorded flags still are
 1544          * in accord with actual ports flags.
 1545          */
 1546         if (status != (lp->lp_ifflags & flag)) {
 1547                 error = (*func)(ifp, status);
 1548                 if (error)
 1549                         return (error);
 1550                 lp->lp_ifflags &= ~flag;
 1551                 lp->lp_ifflags |= status;
 1552         }
 1553         return (0);
 1554 }
 1555 
 1556 /*
 1557  * Handle IFF_* flags that require certain changes on the lagg port
 1558  * if "status" is true, update ports flags respective to the lagg
 1559  * if "status" is false, forcedly clear the flags set on port.
 1560  */
 1561 static int
 1562 lagg_setflags(struct lagg_port *lp, int status)
 1563 {
 1564         int error, i;
 1565 
 1566         for (i = 0; lagg_pflags[i].flag; i++) {
 1567                 error = lagg_setflag(lp, lagg_pflags[i].flag,
 1568                     status, lagg_pflags[i].func);
 1569                 if (error)
 1570                         return (error);
 1571         }
 1572         return (0);
 1573 }
 1574 
 1575 static int
 1576 lagg_transmit(struct ifnet *ifp, struct mbuf *m)
 1577 {
 1578         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1579         int error, len, mcast;
 1580         struct rm_priotracker tracker;
 1581 
 1582         len = m->m_pkthdr.len;
 1583         mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 1584 
 1585         LAGG_RLOCK(sc, &tracker);
 1586         /* We need a Tx algorithm and at least one port */
 1587         if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
 1588                 LAGG_RUNLOCK(sc, &tracker);
 1589                 m_freem(m);
 1590                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1591                 return (ENXIO);
 1592         }
 1593 
 1594         ETHER_BPF_MTAP(ifp, m);
 1595 
 1596         error = lagg_proto_start(sc, m);
 1597         LAGG_RUNLOCK(sc, &tracker);
 1598 
 1599         if (error != 0)
 1600                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1601 
 1602         return (error);
 1603 }
 1604 
 1605 /*
 1606  * The ifp->if_qflush entry point for lagg(4) is no-op.
 1607  */
 1608 static void
 1609 lagg_qflush(struct ifnet *ifp __unused)
 1610 {
 1611 }
 1612 
 1613 static struct mbuf *
 1614 lagg_input(struct ifnet *ifp, struct mbuf *m)
 1615 {
 1616         struct lagg_port *lp = ifp->if_lagg;
 1617         struct lagg_softc *sc = lp->lp_softc;
 1618         struct ifnet *scifp = sc->sc_ifp;
 1619         struct rm_priotracker tracker;
 1620 
 1621         LAGG_RLOCK(sc, &tracker);
 1622         if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 1623             (lp->lp_flags & LAGG_PORT_DISABLED) ||
 1624             sc->sc_proto == LAGG_PROTO_NONE) {
 1625                 LAGG_RUNLOCK(sc, &tracker);
 1626                 m_freem(m);
 1627                 return (NULL);
 1628         }
 1629 
 1630         ETHER_BPF_MTAP(scifp, m);
 1631 
 1632         if (lp->lp_detaching != 0) {
 1633                 m_freem(m);
 1634                 m = NULL;
 1635         } else
 1636                 m = lagg_proto_input(sc, lp, m);
 1637 
 1638         if (m != NULL) {
 1639                 if (scifp->if_flags & IFF_MONITOR) {
 1640                         m_freem(m);
 1641                         m = NULL;
 1642                 }
 1643         }
 1644 
 1645         LAGG_RUNLOCK(sc, &tracker);
 1646         return (m);
 1647 }
 1648 
 1649 static int
 1650 lagg_media_change(struct ifnet *ifp)
 1651 {
 1652         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1653 
 1654         if (sc->sc_ifflags & IFF_DEBUG)
 1655                 printf("%s\n", __func__);
 1656 
 1657         /* Ignore */
 1658         return (0);
 1659 }
 1660 
 1661 static void
 1662 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
 1663 {
 1664         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1665         struct lagg_port *lp;
 1666 
 1667         imr->ifm_status = IFM_AVALID;
 1668         imr->ifm_active = IFM_ETHER | IFM_AUTO;
 1669 
 1670         LAGG_SLOCK(sc);
 1671         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1672                 if (LAGG_PORTACTIVE(lp))
 1673                         imr->ifm_status |= IFM_ACTIVE;
 1674         }
 1675         LAGG_SUNLOCK(sc);
 1676 }
 1677 
 1678 static void
 1679 lagg_linkstate(struct lagg_softc *sc)
 1680 {
 1681         struct lagg_port *lp;
 1682         int new_link = LINK_STATE_DOWN;
 1683         uint64_t speed;
 1684 
 1685         LAGG_XLOCK_ASSERT(sc);
 1686 
 1687         /* Our link is considered up if at least one of our ports is active */
 1688         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1689                 if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
 1690                         new_link = LINK_STATE_UP;
 1691                         break;
 1692                 }
 1693         }
 1694         if_link_state_change(sc->sc_ifp, new_link);
 1695 
 1696         /* Update if_baudrate to reflect the max possible speed */
 1697         switch (sc->sc_proto) {
 1698                 case LAGG_PROTO_FAILOVER:
 1699                         sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
 1700                             sc->sc_primary->lp_ifp->if_baudrate : 0;
 1701                         break;
 1702                 case LAGG_PROTO_ROUNDROBIN:
 1703                 case LAGG_PROTO_LOADBALANCE:
 1704                 case LAGG_PROTO_BROADCAST:
 1705                         speed = 0;
 1706                         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1707                                 speed += lp->lp_ifp->if_baudrate;
 1708                         sc->sc_ifp->if_baudrate = speed;
 1709                         break;
 1710                 case LAGG_PROTO_LACP:
 1711                         /* LACP updates if_baudrate itself */
 1712                         break;
 1713         }
 1714 }
 1715 
 1716 static void
 1717 lagg_port_state(struct ifnet *ifp, int state)
 1718 {
 1719         struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
 1720         struct lagg_softc *sc = NULL;
 1721 
 1722         if (lp != NULL)
 1723                 sc = lp->lp_softc;
 1724         if (sc == NULL)
 1725                 return;
 1726 
 1727         LAGG_XLOCK(sc);
 1728         lagg_linkstate(sc);
 1729         lagg_proto_linkstate(sc, lp);
 1730         LAGG_XUNLOCK(sc);
 1731 }
 1732 
 1733 struct lagg_port *
 1734 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
 1735 {
 1736         struct lagg_port *lp_next, *rval = NULL;
 1737 
 1738         /*
 1739          * Search a port which reports an active link state.
 1740          */
 1741 
 1742         if (lp == NULL)
 1743                 goto search;
 1744         if (LAGG_PORTACTIVE(lp)) {
 1745                 rval = lp;
 1746                 goto found;
 1747         }
 1748         if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
 1749             LAGG_PORTACTIVE(lp_next)) {
 1750                 rval = lp_next;
 1751                 goto found;
 1752         }
 1753 
 1754 search:
 1755         SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 1756                 if (LAGG_PORTACTIVE(lp_next)) {
 1757                         rval = lp_next;
 1758                         goto found;
 1759                 }
 1760         }
 1761 
 1762 found:
 1763         return (rval);
 1764 }
 1765 
 1766 int
 1767 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
 1768 {
 1769 
 1770         return (ifp->if_transmit)(ifp, m);
 1771 }
 1772 
 1773 /*
 1774  * Simple round robin aggregation
 1775  */
 1776 static void
 1777 lagg_rr_attach(struct lagg_softc *sc)
 1778 {
 1779         sc->sc_seq = 0;
 1780         sc->sc_bkt_count = sc->sc_bkt;
 1781 }
 1782 
 1783 static int
 1784 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
 1785 {
 1786         struct lagg_port *lp;
 1787         uint32_t p;
 1788 
 1789         if (sc->sc_bkt_count == 0 && sc->sc_bkt > 0)
 1790                 sc->sc_bkt_count = sc->sc_bkt;
 1791 
 1792         if (sc->sc_bkt > 0) {
 1793                 atomic_subtract_int(&sc->sc_bkt_count, 1);
 1794         if (atomic_cmpset_int(&sc->sc_bkt_count, 0, sc->sc_bkt))
 1795                 p = atomic_fetchadd_32(&sc->sc_seq, 1);
 1796         else
 1797                 p = sc->sc_seq; 
 1798         } else
 1799                 p = atomic_fetchadd_32(&sc->sc_seq, 1);
 1800 
 1801         p %= sc->sc_count;
 1802         lp = SLIST_FIRST(&sc->sc_ports);
 1803 
 1804         while (p--)
 1805                 lp = SLIST_NEXT(lp, lp_entries);
 1806 
 1807         /*
 1808          * Check the port's link state. This will return the next active
 1809          * port if the link is down or the port is NULL.
 1810          */
 1811         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 1812                 m_freem(m);
 1813                 return (ENETDOWN);
 1814         }
 1815 
 1816         /* Send mbuf */
 1817         return (lagg_enqueue(lp->lp_ifp, m));
 1818 }
 1819 
 1820 static struct mbuf *
 1821 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 1822 {
 1823         struct ifnet *ifp = sc->sc_ifp;
 1824 
 1825         /* Just pass in the packet to our lagg device */
 1826         m->m_pkthdr.rcvif = ifp;
 1827 
 1828         return (m);
 1829 }
 1830 
 1831 /*
 1832  * Broadcast mode
 1833  */
 1834 static int
 1835 lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
 1836 {
 1837         int active_ports = 0;
 1838         int errors = 0;
 1839         int ret;
 1840         struct lagg_port *lp, *last = NULL;
 1841         struct mbuf *m0;
 1842 
 1843         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1844                 if (!LAGG_PORTACTIVE(lp))
 1845                         continue;
 1846 
 1847                 active_ports++;
 1848 
 1849                 if (last != NULL) {
 1850                         m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 1851                         if (m0 == NULL) {
 1852                                 ret = ENOBUFS;
 1853                                 errors++;
 1854                                 break;
 1855                         }
 1856 
 1857                         ret = lagg_enqueue(last->lp_ifp, m0);
 1858                         if (ret != 0)
 1859                                 errors++;
 1860                 }
 1861                 last = lp;
 1862         }
 1863         if (last == NULL) {
 1864                 m_freem(m);
 1865                 return (ENOENT);
 1866         }
 1867         if ((last = lagg_link_active(sc, last)) == NULL) {
 1868                 m_freem(m);
 1869                 return (ENETDOWN);
 1870         }
 1871 
 1872         ret = lagg_enqueue(last->lp_ifp, m);
 1873         if (ret != 0)
 1874                 errors++;
 1875 
 1876         if (errors == 0)
 1877                 return (ret);
 1878 
 1879         return (0);
 1880 }
 1881 
 1882 static struct mbuf*
 1883 lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 1884 {
 1885         struct ifnet *ifp = sc->sc_ifp;
 1886 
 1887         /* Just pass in the packet to our lagg device */
 1888         m->m_pkthdr.rcvif = ifp;
 1889         return (m);
 1890 }
 1891 
 1892 /*
 1893  * Active failover
 1894  */
 1895 static int
 1896 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
 1897 {
 1898         struct lagg_port *lp;
 1899 
 1900         /* Use the master port if active or the next available port */
 1901         if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
 1902                 m_freem(m);
 1903                 return (ENETDOWN);
 1904         }
 1905 
 1906         /* Send mbuf */
 1907         return (lagg_enqueue(lp->lp_ifp, m));
 1908 }
 1909 
 1910 static struct mbuf *
 1911 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 1912 {
 1913         struct ifnet *ifp = sc->sc_ifp;
 1914         struct lagg_port *tmp_tp;
 1915 
 1916         if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
 1917                 m->m_pkthdr.rcvif = ifp;
 1918                 return (m);
 1919         }
 1920 
 1921         if (!LAGG_PORTACTIVE(sc->sc_primary)) {
 1922                 tmp_tp = lagg_link_active(sc, sc->sc_primary);
 1923                 /*
 1924                  * If tmp_tp is null, we've received a packet when all
 1925                  * our links are down. Weird, but process it anyways.
 1926                  */
 1927                 if ((tmp_tp == NULL || tmp_tp == lp)) {
 1928                         m->m_pkthdr.rcvif = ifp;
 1929                         return (m);
 1930                 }
 1931         }
 1932 
 1933         m_freem(m);
 1934         return (NULL);
 1935 }
 1936 
 1937 /*
 1938  * Loadbalancing
 1939  */
 1940 static void
 1941 lagg_lb_attach(struct lagg_softc *sc)
 1942 {
 1943         struct lagg_port *lp;
 1944         struct lagg_lb *lb;
 1945 
 1946         lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
 1947         lb->lb_key = m_ether_tcpip_hash_init();
 1948         sc->sc_psc = lb;
 1949 
 1950         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1951                 lagg_lb_port_create(lp);
 1952 }
 1953 
 1954 static void
 1955 lagg_lb_detach(struct lagg_softc *sc)
 1956 {
 1957         struct lagg_lb *lb;
 1958 
 1959         lb = (struct lagg_lb *)sc->sc_psc;
 1960         LAGG_WUNLOCK(sc);
 1961         if (lb != NULL)
 1962                 free(lb, M_DEVBUF);
 1963 }
 1964 
 1965 static int
 1966 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
 1967 {
 1968         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 1969         struct lagg_port *lp_next;
 1970         int i = 0;
 1971 
 1972         bzero(&lb->lb_ports, sizeof(lb->lb_ports));
 1973         SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 1974                 if (lp_next == lp)
 1975                         continue;
 1976                 if (i >= LAGG_MAX_PORTS)
 1977                         return (EINVAL);
 1978                 if (sc->sc_ifflags & IFF_DEBUG)
 1979                         printf("%s: port %s at index %d\n",
 1980                             sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
 1981                 lb->lb_ports[i++] = lp_next;
 1982         }
 1983 
 1984         return (0);
 1985 }
 1986 
 1987 static int
 1988 lagg_lb_port_create(struct lagg_port *lp)
 1989 {
 1990         struct lagg_softc *sc = lp->lp_softc;
 1991         return (lagg_lb_porttable(sc, NULL));
 1992 }
 1993 
 1994 static void
 1995 lagg_lb_port_destroy(struct lagg_port *lp)
 1996 {
 1997         struct lagg_softc *sc = lp->lp_softc;
 1998         lagg_lb_porttable(sc, lp);
 1999 }
 2000 
 2001 static int
 2002 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
 2003 {
 2004         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 2005         struct lagg_port *lp = NULL;
 2006         uint32_t p = 0;
 2007 
 2008         if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
 2009             M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 2010                 p = m->m_pkthdr.flowid >> sc->flowid_shift;
 2011         else
 2012                 p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
 2013         p %= sc->sc_count;
 2014         lp = lb->lb_ports[p];
 2015 
 2016         /*
 2017          * Check the port's link state. This will return the next active
 2018          * port if the link is down or the port is NULL.
 2019          */
 2020         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 2021                 m_freem(m);
 2022                 return (ENETDOWN);
 2023         }
 2024 
 2025         /* Send mbuf */
 2026         return (lagg_enqueue(lp->lp_ifp, m));
 2027 }
 2028 
 2029 static struct mbuf *
 2030 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2031 {
 2032         struct ifnet *ifp = sc->sc_ifp;
 2033 
 2034         /* Just pass in the packet to our lagg device */
 2035         m->m_pkthdr.rcvif = ifp;
 2036 
 2037         return (m);
 2038 }
 2039 
 2040 /*
 2041  * 802.3ad LACP
 2042  */
 2043 static void
 2044 lagg_lacp_attach(struct lagg_softc *sc)
 2045 {
 2046         struct lagg_port *lp;
 2047 
 2048         lacp_attach(sc);
 2049         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2050                 lacp_port_create(lp);
 2051 }
 2052 
 2053 static void
 2054 lagg_lacp_detach(struct lagg_softc *sc)
 2055 {
 2056         struct lagg_port *lp;
 2057         void *psc;
 2058 
 2059         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2060                 lacp_port_destroy(lp);
 2061 
 2062         psc = sc->sc_psc;
 2063         sc->sc_psc = NULL;
 2064         LAGG_WUNLOCK(sc);
 2065 
 2066         lacp_detach(psc);
 2067 }
 2068 
 2069 static void
 2070 lagg_lacp_lladdr(struct lagg_softc *sc)
 2071 {
 2072         struct lagg_port *lp;
 2073 
 2074         LAGG_SXLOCK_ASSERT(sc);
 2075 
 2076         /* purge all the lacp ports */
 2077         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2078                 lacp_port_destroy(lp);
 2079 
 2080         /* add them back in */
 2081         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2082                 lacp_port_create(lp);
 2083 }
 2084 
 2085 static int
 2086 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
 2087 {
 2088         struct lagg_port *lp;
 2089 
 2090         lp = lacp_select_tx_port(sc, m);
 2091         if (lp == NULL) {
 2092                 m_freem(m);
 2093                 return (ENETDOWN);
 2094         }
 2095 
 2096         /* Send mbuf */
 2097         return (lagg_enqueue(lp->lp_ifp, m));
 2098 }
 2099 
 2100 static struct mbuf *
 2101 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2102 {
 2103         struct ifnet *ifp = sc->sc_ifp;
 2104         struct ether_header *eh;
 2105         u_short etype;
 2106 
 2107         eh = mtod(m, struct ether_header *);
 2108         etype = ntohs(eh->ether_type);
 2109 
 2110         /* Tap off LACP control messages */
 2111         if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
 2112                 m = lacp_input(lp, m);
 2113                 if (m == NULL)
 2114                         return (NULL);
 2115         }
 2116 
 2117         /*
 2118          * If the port is not collecting or not in the active aggregator then
 2119          * free and return.
 2120          */
 2121         if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
 2122                 m_freem(m);
 2123                 return (NULL);
 2124         }
 2125 
 2126         m->m_pkthdr.rcvif = ifp;
 2127         return (m);
 2128 }
 2129 

Cache object: 8f0047df4bf067ac7fef182abe65cd52


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.