The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_lagg.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $      */
    2 
    3 /*
    4  * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
    5  * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
    6  * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
    7  *
    8  * Permission to use, copy, modify, and distribute this software for any
    9  * purpose with or without fee is hereby granted, provided that the above
   10  * copyright notice and this permission notice appear in all copies.
   11  *
   12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   19  */
   20 
   21 #include <sys/cdefs.h>
   22 __FBSDID("$FreeBSD: releng/11.0/sys/net/if_lagg.c 302054 2016-06-21 13:48:49Z bz $");
   23 
   24 #include "opt_inet.h"
   25 #include "opt_inet6.h"
   26 
   27 #include <sys/param.h>
   28 #include <sys/kernel.h>
   29 #include <sys/malloc.h>
   30 #include <sys/mbuf.h>
   31 #include <sys/queue.h>
   32 #include <sys/socket.h>
   33 #include <sys/sockio.h>
   34 #include <sys/sysctl.h>
   35 #include <sys/module.h>
   36 #include <sys/priv.h>
   37 #include <sys/systm.h>
   38 #include <sys/proc.h>
   39 #include <sys/lock.h>
   40 #include <sys/rmlock.h>
   41 #include <sys/taskqueue.h>
   42 #include <sys/eventhandler.h>
   43 
   44 #include <net/ethernet.h>
   45 #include <net/if.h>
   46 #include <net/if_clone.h>
   47 #include <net/if_arp.h>
   48 #include <net/if_dl.h>
   49 #include <net/if_media.h>
   50 #include <net/if_types.h>
   51 #include <net/if_var.h>
   52 #include <net/bpf.h>
   53 #include <net/vnet.h>
   54 
   55 #if defined(INET) || defined(INET6)
   56 #include <netinet/in.h>
   57 #include <netinet/ip.h>
   58 #endif
   59 #ifdef INET
   60 #include <netinet/in_systm.h>
   61 #include <netinet/if_ether.h>
   62 #endif
   63 
   64 #ifdef INET6
   65 #include <netinet/ip6.h>
   66 #include <netinet6/in6_var.h>
   67 #include <netinet6/in6_ifattach.h>
   68 #endif
   69 
   70 #include <net/if_vlan_var.h>
   71 #include <net/if_lagg.h>
   72 #include <net/ieee8023ad_lacp.h>
   73 
   74 /* Special flags we should propagate to the lagg ports. */
   75 static struct {
   76         int flag;
   77         int (*func)(struct ifnet *, int);
   78 } lagg_pflags[] = {
   79         {IFF_PROMISC, ifpromisc},
   80         {IFF_ALLMULTI, if_allmulti},
   81         {0, NULL}
   82 };
   83 
   84 VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
   85 #define V_lagg_list     VNET(lagg_list)
   86 static VNET_DEFINE(struct mtx, lagg_list_mtx);
   87 #define V_lagg_list_mtx VNET(lagg_list_mtx)
   88 #define LAGG_LIST_LOCK_INIT(x)          mtx_init(&V_lagg_list_mtx, \
   89                                         "if_lagg list", NULL, MTX_DEF)
   90 #define LAGG_LIST_LOCK_DESTROY(x)       mtx_destroy(&V_lagg_list_mtx)
   91 #define LAGG_LIST_LOCK(x)               mtx_lock(&V_lagg_list_mtx)
   92 #define LAGG_LIST_UNLOCK(x)             mtx_unlock(&V_lagg_list_mtx)
   93 eventhandler_tag        lagg_detach_cookie = NULL;
   94 
   95 static int      lagg_clone_create(struct if_clone *, int, caddr_t);
   96 static void     lagg_clone_destroy(struct ifnet *);
   97 static VNET_DEFINE(struct if_clone *, lagg_cloner);
   98 #define V_lagg_cloner   VNET(lagg_cloner)
   99 static const char laggname[] = "lagg";
  100 
  101 static void     lagg_lladdr(struct lagg_softc *, uint8_t *);
  102 static void     lagg_capabilities(struct lagg_softc *);
  103 static void     lagg_port_lladdr(struct lagg_port *, uint8_t *, lagg_llqtype);
  104 static void     lagg_port_setlladdr(void *, int);
  105 static int      lagg_port_create(struct lagg_softc *, struct ifnet *);
  106 static int      lagg_port_destroy(struct lagg_port *, int);
  107 static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
  108 static void     lagg_linkstate(struct lagg_softc *);
  109 static void     lagg_port_state(struct ifnet *, int);
  110 static int      lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
  111 static int      lagg_port_output(struct ifnet *, struct mbuf *,
  112                     const struct sockaddr *, struct route *);
  113 static void     lagg_port_ifdetach(void *arg __unused, struct ifnet *);
  114 #ifdef LAGG_PORT_STACKING
  115 static int      lagg_port_checkstacking(struct lagg_softc *);
  116 #endif
  117 static void     lagg_port2req(struct lagg_port *, struct lagg_reqport *);
  118 static void     lagg_init(void *);
  119 static void     lagg_stop(struct lagg_softc *);
  120 static int      lagg_ioctl(struct ifnet *, u_long, caddr_t);
  121 static int      lagg_ether_setmulti(struct lagg_softc *);
  122 static int      lagg_ether_cmdmulti(struct lagg_port *, int);
  123 static  int     lagg_setflag(struct lagg_port *, int, int,
  124                     int (*func)(struct ifnet *, int));
  125 static  int     lagg_setflags(struct lagg_port *, int status);
  126 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
  127 static int      lagg_transmit(struct ifnet *, struct mbuf *);
  128 static void     lagg_qflush(struct ifnet *);
  129 static int      lagg_media_change(struct ifnet *);
  130 static void     lagg_media_status(struct ifnet *, struct ifmediareq *);
  131 static struct lagg_port *lagg_link_active(struct lagg_softc *,
  132             struct lagg_port *);
  133 
  134 /* Simple round robin */
  135 static void     lagg_rr_attach(struct lagg_softc *);
  136 static int      lagg_rr_start(struct lagg_softc *, struct mbuf *);
  137 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
  138                     struct mbuf *);
  139 
  140 /* Active failover */
  141 static int      lagg_fail_start(struct lagg_softc *, struct mbuf *);
  142 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
  143                     struct mbuf *);
  144 
  145 /* Loadbalancing */
  146 static void     lagg_lb_attach(struct lagg_softc *);
  147 static void     lagg_lb_detach(struct lagg_softc *);
  148 static int      lagg_lb_port_create(struct lagg_port *);
  149 static void     lagg_lb_port_destroy(struct lagg_port *);
  150 static int      lagg_lb_start(struct lagg_softc *, struct mbuf *);
  151 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
  152                     struct mbuf *);
  153 static int      lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
  154 
  155 /* Broadcast */
  156 static int    lagg_bcast_start(struct lagg_softc *, struct mbuf *);
  157 static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
  158                     struct mbuf *);
  159 
  160 /* 802.3ad LACP */
  161 static void     lagg_lacp_attach(struct lagg_softc *);
  162 static void     lagg_lacp_detach(struct lagg_softc *);
  163 static int      lagg_lacp_start(struct lagg_softc *, struct mbuf *);
  164 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
  165                     struct mbuf *);
  166 static void     lagg_lacp_lladdr(struct lagg_softc *);
  167 
  168 /* lagg protocol table */
  169 static const struct lagg_proto {
  170         lagg_proto      pr_num;
  171         void            (*pr_attach)(struct lagg_softc *);
  172         void            (*pr_detach)(struct lagg_softc *);
  173         int             (*pr_start)(struct lagg_softc *, struct mbuf *);
  174         struct mbuf *   (*pr_input)(struct lagg_softc *, struct lagg_port *,
  175                             struct mbuf *);
  176         int             (*pr_addport)(struct lagg_port *);
  177         void            (*pr_delport)(struct lagg_port *);
  178         void            (*pr_linkstate)(struct lagg_port *);
  179         void            (*pr_init)(struct lagg_softc *);
  180         void            (*pr_stop)(struct lagg_softc *);
  181         void            (*pr_lladdr)(struct lagg_softc *);
  182         void            (*pr_request)(struct lagg_softc *, void *);
  183         void            (*pr_portreq)(struct lagg_port *, void *);
  184 } lagg_protos[] = {
  185     {
  186         .pr_num = LAGG_PROTO_NONE
  187     },
  188     {
  189         .pr_num = LAGG_PROTO_ROUNDROBIN,
  190         .pr_attach = lagg_rr_attach,
  191         .pr_start = lagg_rr_start,
  192         .pr_input = lagg_rr_input,
  193     },
  194     {
  195         .pr_num = LAGG_PROTO_FAILOVER,
  196         .pr_start = lagg_fail_start,
  197         .pr_input = lagg_fail_input,
  198     },
  199     {
  200         .pr_num = LAGG_PROTO_LOADBALANCE,
  201         .pr_attach = lagg_lb_attach,
  202         .pr_detach = lagg_lb_detach,
  203         .pr_start = lagg_lb_start,
  204         .pr_input = lagg_lb_input,
  205         .pr_addport = lagg_lb_port_create,
  206         .pr_delport = lagg_lb_port_destroy,
  207     },
  208     {
  209         .pr_num = LAGG_PROTO_LACP,
  210         .pr_attach = lagg_lacp_attach,
  211         .pr_detach = lagg_lacp_detach,
  212         .pr_start = lagg_lacp_start,
  213         .pr_input = lagg_lacp_input,
  214         .pr_addport = lacp_port_create,
  215         .pr_delport = lacp_port_destroy,
  216         .pr_linkstate = lacp_linkstate,
  217         .pr_init = lacp_init,
  218         .pr_stop = lacp_stop,
  219         .pr_lladdr = lagg_lacp_lladdr,
  220         .pr_request = lacp_req,
  221         .pr_portreq = lacp_portreq,
  222     },
  223     {
  224         .pr_num = LAGG_PROTO_BROADCAST,
  225         .pr_start = lagg_bcast_start,
  226         .pr_input = lagg_bcast_input,
  227     },
  228 };
  229 
  230 SYSCTL_DECL(_net_link);
  231 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
  232     "Link Aggregation");
  233 
  234 /* Allow input on any failover links */
  235 static VNET_DEFINE(int, lagg_failover_rx_all);
  236 #define V_lagg_failover_rx_all  VNET(lagg_failover_rx_all)
  237 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
  238     &VNET_NAME(lagg_failover_rx_all), 0,
  239     "Accept input from any interface in a failover lagg");
  240 
  241 /* Default value for using flowid */
  242 static VNET_DEFINE(int, def_use_flowid) = 1;
  243 #define V_def_use_flowid        VNET(def_use_flowid)
  244 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
  245     &VNET_NAME(def_use_flowid), 0,
  246     "Default setting for using flow id for load sharing");
  247 
  248 /* Default value for flowid shift */
  249 static VNET_DEFINE(int, def_flowid_shift) = 16;
  250 #define V_def_flowid_shift      VNET(def_flowid_shift)
  251 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
  252     &VNET_NAME(def_flowid_shift), 0,
  253     "Default setting for flowid shift for load sharing");
  254 
  255 static void
  256 vnet_lagg_init(const void *unused __unused)
  257 {
  258 
  259         LAGG_LIST_LOCK_INIT();
  260         SLIST_INIT(&V_lagg_list);
  261         V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
  262             lagg_clone_destroy, 0);
  263 }
  264 VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
  265     vnet_lagg_init, NULL);
  266 
  267 static void
  268 vnet_lagg_uninit(const void *unused __unused)
  269 {
  270 
  271         if_clone_detach(V_lagg_cloner);
  272         LAGG_LIST_LOCK_DESTROY();
  273 }
  274 VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
  275     vnet_lagg_uninit, NULL);
  276 
  277 static int
  278 lagg_modevent(module_t mod, int type, void *data)
  279 {
  280 
  281         switch (type) {
  282         case MOD_LOAD:
  283                 lagg_input_p = lagg_input;
  284                 lagg_linkstate_p = lagg_port_state;
  285                 lagg_detach_cookie = EVENTHANDLER_REGISTER(
  286                     ifnet_departure_event, lagg_port_ifdetach, NULL,
  287                     EVENTHANDLER_PRI_ANY);
  288                 break;
  289         case MOD_UNLOAD:
  290                 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
  291                     lagg_detach_cookie);
  292                 lagg_input_p = NULL;
  293                 lagg_linkstate_p = NULL;
  294                 break;
  295         default:
  296                 return (EOPNOTSUPP);
  297         }
  298         return (0);
  299 }
  300 
  301 static moduledata_t lagg_mod = {
  302         "if_lagg",
  303         lagg_modevent,
  304         0
  305 };
  306 
  307 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  308 MODULE_VERSION(if_lagg, 1);
  309 
  310 static void
  311 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
  312 {
  313 
  314         KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
  315             __func__, sc));
  316 
  317         if (sc->sc_ifflags & IFF_DEBUG)
  318                 if_printf(sc->sc_ifp, "using proto %u\n", pr);
  319 
  320         if (lagg_protos[pr].pr_attach != NULL)
  321                 lagg_protos[pr].pr_attach(sc);
  322         sc->sc_proto = pr;
  323 }
  324 
  325 static void
  326 lagg_proto_detach(struct lagg_softc *sc)
  327 {
  328         lagg_proto pr;
  329 
  330         LAGG_WLOCK_ASSERT(sc);
  331 
  332         pr = sc->sc_proto;
  333         sc->sc_proto = LAGG_PROTO_NONE;
  334 
  335         if (lagg_protos[pr].pr_detach != NULL)
  336                 lagg_protos[pr].pr_detach(sc);
  337         else
  338                 LAGG_WUNLOCK(sc);
  339 }
  340 
  341 static int
  342 lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
  343 {
  344 
  345         return (lagg_protos[sc->sc_proto].pr_start(sc, m));
  346 }
  347 
  348 static struct mbuf *
  349 lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
  350 {
  351 
  352         return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
  353 }
  354 
  355 static int
  356 lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
  357 {
  358 
  359         if (lagg_protos[sc->sc_proto].pr_addport == NULL)
  360                 return (0);
  361         else
  362                 return (lagg_protos[sc->sc_proto].pr_addport(lp));
  363 }
  364 
  365 static void
  366 lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
  367 {
  368 
  369         if (lagg_protos[sc->sc_proto].pr_delport != NULL)
  370                 lagg_protos[sc->sc_proto].pr_delport(lp);
  371 }
  372 
  373 static void
  374 lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
  375 {
  376 
  377         if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
  378                 lagg_protos[sc->sc_proto].pr_linkstate(lp);
  379 }
  380 
  381 static void
  382 lagg_proto_init(struct lagg_softc *sc)
  383 {
  384 
  385         if (lagg_protos[sc->sc_proto].pr_init != NULL)
  386                 lagg_protos[sc->sc_proto].pr_init(sc);
  387 }
  388 
  389 static void
  390 lagg_proto_stop(struct lagg_softc *sc)
  391 {
  392 
  393         if (lagg_protos[sc->sc_proto].pr_stop != NULL)
  394                 lagg_protos[sc->sc_proto].pr_stop(sc);
  395 }
  396 
  397 static void
  398 lagg_proto_lladdr(struct lagg_softc *sc)
  399 {
  400 
  401         if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
  402                 lagg_protos[sc->sc_proto].pr_lladdr(sc);
  403 }
  404 
  405 static void
  406 lagg_proto_request(struct lagg_softc *sc, void *v)
  407 {
  408 
  409         if (lagg_protos[sc->sc_proto].pr_request != NULL)
  410                 lagg_protos[sc->sc_proto].pr_request(sc, v);
  411 }
  412 
  413 static void
  414 lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
  415 {
  416 
  417         if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
  418                 lagg_protos[sc->sc_proto].pr_portreq(lp, v);
  419 }
  420 
  421 /*
  422  * This routine is run via an vlan
  423  * config EVENT
  424  */
  425 static void
  426 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  427 {
  428         struct lagg_softc *sc = ifp->if_softc;
  429         struct lagg_port *lp;
  430         struct rm_priotracker tracker;
  431 
  432         if (ifp->if_softc !=  arg)   /* Not our event */
  433                 return;
  434 
  435         LAGG_RLOCK(sc, &tracker);
  436         if (!SLIST_EMPTY(&sc->sc_ports)) {
  437                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  438                         EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
  439         }
  440         LAGG_RUNLOCK(sc, &tracker);
  441 }
  442 
  443 /*
  444  * This routine is run via an vlan
  445  * unconfig EVENT
  446  */
  447 static void
  448 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  449 {
  450         struct lagg_softc *sc = ifp->if_softc;
  451         struct lagg_port *lp;
  452         struct rm_priotracker tracker;
  453 
  454         if (ifp->if_softc !=  arg)   /* Not our event */
  455                 return;
  456 
  457         LAGG_RLOCK(sc, &tracker);
  458         if (!SLIST_EMPTY(&sc->sc_ports)) {
  459                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  460                         EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
  461         }
  462         LAGG_RUNLOCK(sc, &tracker);
  463 }
  464 
  465 static int
  466 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
  467 {
  468         struct lagg_softc *sc;
  469         struct ifnet *ifp;
  470         static const u_char eaddr[6];   /* 00:00:00:00:00:00 */
  471 
  472         sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
  473         ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
  474         if (ifp == NULL) {
  475                 free(sc, M_DEVBUF);
  476                 return (ENOSPC);
  477         }
  478 
  479         if (V_def_use_flowid)
  480                 sc->sc_opts |= LAGG_OPT_USE_FLOWID;
  481         sc->flowid_shift = V_def_flowid_shift;
  482 
  483         /* Hash all layers by default */
  484         sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
  485 
  486         lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
  487 
  488         LAGG_LOCK_INIT(sc);
  489         SLIST_INIT(&sc->sc_ports);
  490         TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
  491 
  492         /* Initialise pseudo media types */
  493         ifmedia_init(&sc->sc_media, 0, lagg_media_change,
  494             lagg_media_status);
  495         ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
  496         ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
  497 
  498         if_initname(ifp, laggname, unit);
  499         ifp->if_softc = sc;
  500         ifp->if_transmit = lagg_transmit;
  501         ifp->if_qflush = lagg_qflush;
  502         ifp->if_init = lagg_init;
  503         ifp->if_ioctl = lagg_ioctl;
  504         ifp->if_get_counter = lagg_get_counter;
  505         ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
  506         ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
  507 
  508         /*
  509          * Attach as an ordinary ethernet device, children will be attached
  510          * as special device IFT_IEEE8023ADLAG.
  511          */
  512         ether_ifattach(ifp, eaddr);
  513 
  514         sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
  515                 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
  516         sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
  517                 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
  518 
  519         /* Insert into the global list of laggs */
  520         LAGG_LIST_LOCK();
  521         SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
  522         LAGG_LIST_UNLOCK();
  523 
  524         return (0);
  525 }
  526 
  527 static void
  528 lagg_clone_destroy(struct ifnet *ifp)
  529 {
  530         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  531         struct lagg_port *lp;
  532 
  533         LAGG_WLOCK(sc);
  534 
  535         lagg_stop(sc);
  536         ifp->if_flags &= ~IFF_UP;
  537 
  538         EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
  539         EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
  540 
  541         /* Shutdown and remove lagg ports */
  542         while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
  543                 lagg_port_destroy(lp, 1);
  544         /* Unhook the aggregation protocol */
  545         lagg_proto_detach(sc);
  546         LAGG_UNLOCK_ASSERT(sc);
  547 
  548         ifmedia_removeall(&sc->sc_media);
  549         ether_ifdetach(ifp);
  550         if_free(ifp);
  551 
  552         LAGG_LIST_LOCK();
  553         SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
  554         LAGG_LIST_UNLOCK();
  555 
  556         taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
  557         LAGG_LOCK_DESTROY(sc);
  558         free(sc, M_DEVBUF);
  559 }
  560 
  561 /*
  562  * Set link-layer address on the lagg interface itself.
  563  * 
  564  * Set noinline to be dtrace-friendly
  565  */
  566 static __noinline void
  567 lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
  568 {
  569         struct ifnet *ifp = sc->sc_ifp;
  570         struct lagg_port lp;
  571 
  572         if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
  573                 return;
  574 
  575         LAGG_WLOCK_ASSERT(sc);
  576         /*
  577          * Set the link layer address on the lagg interface.
  578          * lagg_proto_lladdr() notifies the MAC change to
  579          * the aggregation protocol.  iflladdr_event handler which
  580          * may trigger gratuitous ARPs for INET will be handled in
  581          * a taskqueue.
  582          */
  583         bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
  584         lagg_proto_lladdr(sc);
  585 
  586         /*
  587          * Send notification request for lagg interface
  588          * itself. Note that new lladdr is already set.
  589          */
  590         bzero(&lp, sizeof(lp));
  591         lp.lp_ifp = sc->sc_ifp;
  592         lp.lp_softc = sc;
  593 
  594         /* Do not request lladdr change */
  595         lagg_port_lladdr(&lp, lladdr, LAGG_LLQTYPE_VIRT);
  596 }
  597 
  598 static void
  599 lagg_capabilities(struct lagg_softc *sc)
  600 {
  601         struct lagg_port *lp;
  602         int cap = ~0, ena = ~0;
  603         u_long hwa = ~0UL;
  604         struct ifnet_hw_tsomax hw_tsomax;
  605 
  606         LAGG_WLOCK_ASSERT(sc);
  607 
  608         memset(&hw_tsomax, 0, sizeof(hw_tsomax));
  609 
  610         /* Get capabilities from the lagg ports */
  611         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  612                 cap &= lp->lp_ifp->if_capabilities;
  613                 ena &= lp->lp_ifp->if_capenable;
  614                 hwa &= lp->lp_ifp->if_hwassist;
  615                 if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
  616         }
  617         cap = (cap == ~0 ? 0 : cap);
  618         ena = (ena == ~0 ? 0 : ena);
  619         hwa = (hwa == ~0 ? 0 : hwa);
  620 
  621         if (sc->sc_ifp->if_capabilities != cap ||
  622             sc->sc_ifp->if_capenable != ena ||
  623             sc->sc_ifp->if_hwassist != hwa ||
  624             if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
  625                 sc->sc_ifp->if_capabilities = cap;
  626                 sc->sc_ifp->if_capenable = ena;
  627                 sc->sc_ifp->if_hwassist = hwa;
  628                 getmicrotime(&sc->sc_ifp->if_lastchange);
  629 
  630                 if (sc->sc_ifflags & IFF_DEBUG)
  631                         if_printf(sc->sc_ifp,
  632                             "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
  633         }
  634 }
  635 
  636 /*
  637  * Enqueue interface lladdr notification.
  638  * If request is already queued, it is updated.
  639  * If setting lladdr is also desired, @do_change has to be set to 1.
  640  *
  641  * Set noinline to be dtrace-friendly
  642  */
  643 static __noinline void
  644 lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr, lagg_llqtype llq_type)
  645 {
  646         struct lagg_softc *sc = lp->lp_softc;
  647         struct ifnet *ifp = lp->lp_ifp;
  648         struct lagg_llq *llq;
  649 
  650         LAGG_WLOCK_ASSERT(sc);
  651 
  652         /*
  653          * Do not enqueue requests where lladdr is the same for
  654          * "physical" interfaces (e.g. ports in lagg)
  655          */
  656         if (llq_type == LAGG_LLQTYPE_PHYS &&
  657             memcmp(IF_LLADDR(ifp), lladdr, ETHER_ADDR_LEN) == 0)
  658                 return;
  659 
  660         /* Check to make sure its not already queued to be changed */
  661         SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
  662                 if (llq->llq_ifp == ifp) {
  663                         /* Update lladdr, it may have changed */
  664                         bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
  665                         return;
  666                 }
  667         }
  668 
  669         llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT | M_ZERO);
  670         if (llq == NULL)        /* XXX what to do */
  671                 return;
  672 
  673         llq->llq_ifp = ifp;
  674         llq->llq_type = llq_type;
  675         bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
  676         /* XXX: We should insert to tail */
  677         SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
  678 
  679         taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
  680 }
  681 
  682 /*
  683  * Set the interface MAC address from a taskqueue to avoid a LOR.
  684  *
  685  * Set noinline to be dtrace-friendly
  686  */
  687 static __noinline void
  688 lagg_port_setlladdr(void *arg, int pending)
  689 {
  690         struct lagg_softc *sc = (struct lagg_softc *)arg;
  691         struct lagg_llq *llq, *head;
  692         struct ifnet *ifp;
  693 
  694         /* Grab a local reference of the queue and remove it from the softc */
  695         LAGG_WLOCK(sc);
  696         head = SLIST_FIRST(&sc->sc_llq_head);
  697         SLIST_FIRST(&sc->sc_llq_head) = NULL;
  698         LAGG_WUNLOCK(sc);
  699 
  700         /*
  701          * Traverse the queue and set the lladdr on each ifp. It is safe to do
  702          * unlocked as we have the only reference to it.
  703          */
  704         for (llq = head; llq != NULL; llq = head) {
  705                 ifp = llq->llq_ifp;
  706 
  707                 CURVNET_SET(ifp->if_vnet);
  708 
  709                 /*
  710                  * Set the link layer address on the laggport interface.
  711                  * Note that if_setlladdr() or iflladdr_event handler
  712                  * may result in arp transmission / lltable updates.
  713                  */
  714                 if (llq->llq_type == LAGG_LLQTYPE_PHYS)
  715                         if_setlladdr(ifp, llq->llq_lladdr,
  716                             ETHER_ADDR_LEN);
  717                 else
  718                         EVENTHANDLER_INVOKE(iflladdr_event, ifp);
  719                 CURVNET_RESTORE();
  720                 head = SLIST_NEXT(llq, llq_entries);
  721                 free(llq, M_DEVBUF);
  722         }
  723 }
  724 
  725 static int
  726 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
  727 {
  728         struct lagg_softc *sc_ptr;
  729         struct lagg_port *lp, *tlp;
  730         int error, i;
  731         uint64_t *pval;
  732 
  733         LAGG_WLOCK_ASSERT(sc);
  734 
  735         /* Limit the maximal number of lagg ports */
  736         if (sc->sc_count >= LAGG_MAX_PORTS)
  737                 return (ENOSPC);
  738 
  739         /* Check if port has already been associated to a lagg */
  740         if (ifp->if_lagg != NULL) {
  741                 /* Port is already in the current lagg? */
  742                 lp = (struct lagg_port *)ifp->if_lagg;
  743                 if (lp->lp_softc == sc)
  744                         return (EEXIST);
  745                 return (EBUSY);
  746         }
  747 
  748         /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
  749         if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
  750                 return (EPROTONOSUPPORT);
  751 
  752         /* Allow the first Ethernet member to define the MTU */
  753         if (SLIST_EMPTY(&sc->sc_ports))
  754                 sc->sc_ifp->if_mtu = ifp->if_mtu;
  755         else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
  756                 if_printf(sc->sc_ifp, "invalid MTU for %s\n",
  757                     ifp->if_xname);
  758                 return (EINVAL);
  759         }
  760 
  761         if ((lp = malloc(sizeof(struct lagg_port),
  762             M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
  763                 return (ENOMEM);
  764 
  765         /* Check if port is a stacked lagg */
  766         LAGG_LIST_LOCK();
  767         SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
  768                 if (ifp == sc_ptr->sc_ifp) {
  769                         LAGG_LIST_UNLOCK();
  770                         free(lp, M_DEVBUF);
  771                         return (EINVAL);
  772                         /* XXX disable stacking for the moment, its untested */
  773 #ifdef LAGG_PORT_STACKING
  774                         lp->lp_flags |= LAGG_PORT_STACK;
  775                         if (lagg_port_checkstacking(sc_ptr) >=
  776                             LAGG_MAX_STACKING) {
  777                                 LAGG_LIST_UNLOCK();
  778                                 free(lp, M_DEVBUF);
  779                                 return (E2BIG);
  780                         }
  781 #endif
  782                 }
  783         }
  784         LAGG_LIST_UNLOCK();
  785 
  786         /* Change the interface type */
  787         lp->lp_iftype = ifp->if_type;
  788         ifp->if_type = IFT_IEEE8023ADLAG;
  789         ifp->if_lagg = lp;
  790         lp->lp_ioctl = ifp->if_ioctl;
  791         ifp->if_ioctl = lagg_port_ioctl;
  792         lp->lp_output = ifp->if_output;
  793         ifp->if_output = lagg_port_output;
  794 
  795         lp->lp_ifp = ifp;
  796         lp->lp_softc = sc;
  797 
  798         /* Save port link layer address */
  799         bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
  800 
  801         if (SLIST_EMPTY(&sc->sc_ports)) {
  802                 sc->sc_primary = lp;
  803                 /* First port in lagg. Update/notify lagg lladdress */
  804                 lagg_lladdr(sc, IF_LLADDR(ifp));
  805         } else {
  806 
  807                 /*
  808                  * Update link layer address for this port and
  809                  * send notifications to other subsystems.
  810                  */
  811                 lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp), LAGG_LLQTYPE_PHYS);
  812         }
  813 
  814         /*
  815          * Insert into the list of ports.
  816          * Keep ports sorted by if_index. It is handy, when configuration
  817          * is predictable and `ifconfig laggN create ...` command
  818          * will lead to the same result each time.
  819          */
  820         SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
  821                 if (tlp->lp_ifp->if_index < ifp->if_index && (
  822                     SLIST_NEXT(tlp, lp_entries) == NULL ||
  823                     SLIST_NEXT(tlp, lp_entries)->lp_ifp->if_index >
  824                     ifp->if_index))
  825                         break;
  826         }
  827         if (tlp != NULL)
  828                 SLIST_INSERT_AFTER(tlp, lp, lp_entries);
  829         else
  830                 SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
  831         sc->sc_count++;
  832 
  833         /* Update lagg capabilities */
  834         lagg_capabilities(sc);
  835         lagg_linkstate(sc);
  836 
  837         /* Read port counters */
  838         pval = lp->port_counters.val;
  839         for (i = 0; i < IFCOUNTERS; i++, pval++)
  840                 *pval = ifp->if_get_counter(ifp, i);
  841         /* Add multicast addresses and interface flags to this port */
  842         lagg_ether_cmdmulti(lp, 1);
  843         lagg_setflags(lp, 1);
  844 
  845         if ((error = lagg_proto_addport(sc, lp)) != 0) {
  846                 /* Remove the port, without calling pr_delport. */
  847                 lagg_port_destroy(lp, 0);
  848                 return (error);
  849         }
  850 
  851         return (0);
  852 }
  853 
  854 #ifdef LAGG_PORT_STACKING
  855 static int
  856 lagg_port_checkstacking(struct lagg_softc *sc)
  857 {
  858         struct lagg_softc *sc_ptr;
  859         struct lagg_port *lp;
  860         int m = 0;
  861 
  862         LAGG_WLOCK_ASSERT(sc);
  863 
  864         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  865                 if (lp->lp_flags & LAGG_PORT_STACK) {
  866                         sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
  867                         m = MAX(m, lagg_port_checkstacking(sc_ptr));
  868                 }
  869         }
  870 
  871         return (m + 1);
  872 }
  873 #endif
  874 
  875 static int
  876 lagg_port_destroy(struct lagg_port *lp, int rundelport)
  877 {
  878         struct lagg_softc *sc = lp->lp_softc;
  879         struct lagg_port *lp_ptr, *lp0;
  880         struct lagg_llq *llq;
  881         struct ifnet *ifp = lp->lp_ifp;
  882         uint64_t *pval, vdiff;
  883         int i;
  884 
  885         LAGG_WLOCK_ASSERT(sc);
  886 
  887         if (rundelport)
  888                 lagg_proto_delport(sc, lp);
  889 
  890         /*
  891          * Remove multicast addresses and interface flags from this port and
  892          * reset the MAC address, skip if the interface is being detached.
  893          */
  894         if (!lp->lp_detaching) {
  895                 lagg_ether_cmdmulti(lp, 0);
  896                 lagg_setflags(lp, 0);
  897                 lagg_port_lladdr(lp, lp->lp_lladdr, LAGG_LLQTYPE_PHYS);
  898         }
  899 
  900         /* Restore interface */
  901         ifp->if_type = lp->lp_iftype;
  902         ifp->if_ioctl = lp->lp_ioctl;
  903         ifp->if_output = lp->lp_output;
  904         ifp->if_lagg = NULL;
  905 
  906         /* Update detached port counters */
  907         pval = lp->port_counters.val;
  908         for (i = 0; i < IFCOUNTERS; i++, pval++) {
  909                 vdiff = ifp->if_get_counter(ifp, i) - *pval;
  910                 sc->detached_counters.val[i] += vdiff;
  911         }
  912 
  913         /* Finally, remove the port from the lagg */
  914         SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
  915         sc->sc_count--;
  916 
  917         /* Update the primary interface */
  918         if (lp == sc->sc_primary) {
  919                 uint8_t lladdr[ETHER_ADDR_LEN];
  920 
  921                 if ((lp0 = SLIST_FIRST(&sc->sc_ports)) == NULL) {
  922                         bzero(&lladdr, ETHER_ADDR_LEN);
  923                 } else {
  924                         bcopy(lp0->lp_lladdr,
  925                             lladdr, ETHER_ADDR_LEN);
  926                 }
  927                 lagg_lladdr(sc, lladdr);
  928 
  929                 /* Mark lp0 as new primary */
  930                 sc->sc_primary = lp0;
  931 
  932                 /*
  933                  * Enqueue lladdr update/notification for each port
  934                  * (new primary needs update as well, to switch from
  935                  * old lladdr to its 'real' one).
  936                  */
  937                 SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
  938                         lagg_port_lladdr(lp_ptr, lladdr, LAGG_LLQTYPE_PHYS);
  939         }
  940 
  941         /* Remove any pending lladdr changes from the queue */
  942         if (lp->lp_detaching) {
  943                 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
  944                         if (llq->llq_ifp == ifp) {
  945                                 SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
  946                                     llq_entries);
  947                                 free(llq, M_DEVBUF);
  948                                 break;  /* Only appears once */
  949                         }
  950                 }
  951         }
  952 
  953         if (lp->lp_ifflags)
  954                 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
  955 
  956         free(lp, M_DEVBUF);
  957 
  958         /* Update lagg capabilities */
  959         lagg_capabilities(sc);
  960         lagg_linkstate(sc);
  961 
  962         return (0);
  963 }
  964 
  965 static int
  966 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  967 {
  968         struct lagg_reqport *rp = (struct lagg_reqport *)data;
  969         struct lagg_softc *sc;
  970         struct lagg_port *lp = NULL;
  971         int error = 0;
  972         struct rm_priotracker tracker;
  973 
  974         /* Should be checked by the caller */
  975         if (ifp->if_type != IFT_IEEE8023ADLAG ||
  976             (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
  977                 goto fallback;
  978 
  979         switch (cmd) {
  980         case SIOCGLAGGPORT:
  981                 if (rp->rp_portname[0] == '\0' ||
  982                     ifunit(rp->rp_portname) != ifp) {
  983                         error = EINVAL;
  984                         break;
  985                 }
  986 
  987                 LAGG_RLOCK(sc, &tracker);
  988                 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
  989                         error = ENOENT;
  990                         LAGG_RUNLOCK(sc, &tracker);
  991                         break;
  992                 }
  993 
  994                 lagg_port2req(lp, rp);
  995                 LAGG_RUNLOCK(sc, &tracker);
  996                 break;
  997 
  998         case SIOCSIFCAP:
  999                 if (lp->lp_ioctl == NULL) {
 1000                         error = EINVAL;
 1001                         break;
 1002                 }
 1003                 error = (*lp->lp_ioctl)(ifp, cmd, data);
 1004                 if (error)
 1005                         break;
 1006 
 1007                 /* Update lagg interface capabilities */
 1008                 LAGG_WLOCK(sc);
 1009                 lagg_capabilities(sc);
 1010                 LAGG_WUNLOCK(sc);
 1011                 break;
 1012 
 1013         case SIOCSIFMTU:
 1014                 /* Do not allow the MTU to be changed once joined */
 1015                 error = EINVAL;
 1016                 break;
 1017 
 1018         default:
 1019                 goto fallback;
 1020         }
 1021 
 1022         return (error);
 1023 
 1024 fallback:
 1025         if (lp->lp_ioctl != NULL)
 1026                 return ((*lp->lp_ioctl)(ifp, cmd, data));
 1027 
 1028         return (EINVAL);
 1029 }
 1030 
 1031 /*
 1032  * Requests counter @cnt data. 
 1033  *
 1034  * Counter value is calculated the following way:
 1035  * 1) for each port, sum  difference between current and "initial" measurements.
 1036  * 2) add lagg logical interface counters.
 1037  * 3) add data from detached_counters array.
 1038  *
 1039  * We also do the following things on ports attach/detach:
 1040  * 1) On port attach we store all counters it has into port_counter array. 
 1041  * 2) On port detach we add the different between "initial" and
 1042  *   current counters data to detached_counters array.
 1043  */
 1044 static uint64_t
 1045 lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
 1046 {
 1047         struct lagg_softc *sc;
 1048         struct lagg_port *lp;
 1049         struct ifnet *lpifp;
 1050         struct rm_priotracker tracker;
 1051         uint64_t newval, oldval, vsum;
 1052 
 1053         /* Revise this when we've got non-generic counters. */
 1054         KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 1055 
 1056         sc = (struct lagg_softc *)ifp->if_softc;
 1057         LAGG_RLOCK(sc, &tracker);
 1058 
 1059         vsum = 0;
 1060         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1061                 /* Saved attached value */
 1062                 oldval = lp->port_counters.val[cnt];
 1063                 /* current value */
 1064                 lpifp = lp->lp_ifp;
 1065                 newval = lpifp->if_get_counter(lpifp, cnt);
 1066                 /* Calculate diff and save new */
 1067                 vsum += newval - oldval;
 1068         }
 1069 
 1070         /*
 1071          * Add counter data which might be added by upper
 1072          * layer protocols operating on logical interface.
 1073          */
 1074         vsum += if_get_counter_default(ifp, cnt);
 1075 
 1076         /*
 1077          * Add counter data from detached ports counters
 1078          */
 1079         vsum += sc->detached_counters.val[cnt];
 1080 
 1081         LAGG_RUNLOCK(sc, &tracker);
 1082 
 1083         return (vsum);
 1084 }
 1085 
 1086 /*
 1087  * For direct output to child ports.
 1088  */
 1089 static int
 1090 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
 1091         const struct sockaddr *dst, struct route *ro)
 1092 {
 1093         struct lagg_port *lp = ifp->if_lagg;
 1094 
 1095         switch (dst->sa_family) {
 1096                 case pseudo_AF_HDRCMPLT:
 1097                 case AF_UNSPEC:
 1098                         return ((*lp->lp_output)(ifp, m, dst, ro));
 1099         }
 1100 
 1101         /* drop any other frames */
 1102         m_freem(m);
 1103         return (ENETDOWN);
 1104 }
 1105 
 1106 static void
 1107 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
 1108 {
 1109         struct lagg_port *lp;
 1110         struct lagg_softc *sc;
 1111 
 1112         if ((lp = ifp->if_lagg) == NULL)
 1113                 return;
 1114         /* If the ifnet is just being renamed, don't do anything. */
 1115         if (ifp->if_flags & IFF_RENAMING)
 1116                 return;
 1117 
 1118         sc = lp->lp_softc;
 1119 
 1120         LAGG_WLOCK(sc);
 1121         lp->lp_detaching = 1;
 1122         lagg_port_destroy(lp, 1);
 1123         LAGG_WUNLOCK(sc);
 1124 }
 1125 
 1126 static void
 1127 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
 1128 {
 1129         struct lagg_softc *sc = lp->lp_softc;
 1130 
 1131         strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
 1132         strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
 1133         rp->rp_prio = lp->lp_prio;
 1134         rp->rp_flags = lp->lp_flags;
 1135         lagg_proto_portreq(sc, lp, &rp->rp_psc);
 1136 
 1137         /* Add protocol specific flags */
 1138         switch (sc->sc_proto) {
 1139                 case LAGG_PROTO_FAILOVER:
 1140                         if (lp == sc->sc_primary)
 1141                                 rp->rp_flags |= LAGG_PORT_MASTER;
 1142                         if (lp == lagg_link_active(sc, sc->sc_primary))
 1143                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1144                         break;
 1145 
 1146                 case LAGG_PROTO_ROUNDROBIN:
 1147                 case LAGG_PROTO_LOADBALANCE:
 1148                 case LAGG_PROTO_BROADCAST:
 1149                         if (LAGG_PORTACTIVE(lp))
 1150                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1151                         break;
 1152 
 1153                 case LAGG_PROTO_LACP:
 1154                         /* LACP has a different definition of active */
 1155                         if (lacp_isactive(lp))
 1156                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1157                         if (lacp_iscollecting(lp))
 1158                                 rp->rp_flags |= LAGG_PORT_COLLECTING;
 1159                         if (lacp_isdistributing(lp))
 1160                                 rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
 1161                         break;
 1162         }
 1163 
 1164 }
 1165 
 1166 static void
 1167 lagg_init(void *xsc)
 1168 {
 1169         struct lagg_softc *sc = (struct lagg_softc *)xsc;
 1170         struct ifnet *ifp = sc->sc_ifp;
 1171         struct lagg_port *lp;
 1172 
 1173         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 1174                 return;
 1175 
 1176         LAGG_WLOCK(sc);
 1177 
 1178         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 1179 
 1180         /*
 1181          * Update the port lladdrs if needed.
 1182          * This might be if_setlladdr() notification
 1183          * that lladdr has been changed.
 1184          */
 1185         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1186                 lagg_port_lladdr(lp, IF_LLADDR(ifp), LAGG_LLQTYPE_PHYS);
 1187 
 1188         lagg_proto_init(sc);
 1189 
 1190         LAGG_WUNLOCK(sc);
 1191 }
 1192 
 1193 static void
 1194 lagg_stop(struct lagg_softc *sc)
 1195 {
 1196         struct ifnet *ifp = sc->sc_ifp;
 1197 
 1198         LAGG_WLOCK_ASSERT(sc);
 1199 
 1200         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 1201                 return;
 1202 
 1203         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 1204 
 1205         lagg_proto_stop(sc);
 1206 }
 1207 
 1208 static int
 1209 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 1210 {
 1211         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1212         struct lagg_reqall *ra = (struct lagg_reqall *)data;
 1213         struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
 1214         struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
 1215         struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
 1216         struct ifreq *ifr = (struct ifreq *)data;
 1217         struct lagg_port *lp;
 1218         struct ifnet *tpif;
 1219         struct thread *td = curthread;
 1220         char *buf, *outbuf;
 1221         int count, buflen, len, error = 0;
 1222         struct rm_priotracker tracker;
 1223 
 1224         bzero(&rpbuf, sizeof(rpbuf));
 1225 
 1226         switch (cmd) {
 1227         case SIOCGLAGG:
 1228                 LAGG_RLOCK(sc, &tracker);
 1229                 count = 0;
 1230                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1231                         count++;
 1232                 buflen = count * sizeof(struct lagg_reqport);
 1233                 LAGG_RUNLOCK(sc, &tracker);
 1234 
 1235                 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 1236 
 1237                 LAGG_RLOCK(sc, &tracker);
 1238                 ra->ra_proto = sc->sc_proto;
 1239                 lagg_proto_request(sc, &ra->ra_psc);
 1240                 count = 0;
 1241                 buf = outbuf;
 1242                 len = min(ra->ra_size, buflen);
 1243                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1244                         if (len < sizeof(rpbuf))
 1245                                 break;
 1246 
 1247                         lagg_port2req(lp, &rpbuf);
 1248                         memcpy(buf, &rpbuf, sizeof(rpbuf));
 1249                         count++;
 1250                         buf += sizeof(rpbuf);
 1251                         len -= sizeof(rpbuf);
 1252                 }
 1253                 LAGG_RUNLOCK(sc, &tracker);
 1254                 ra->ra_ports = count;
 1255                 ra->ra_size = count * sizeof(rpbuf);
 1256                 error = copyout(outbuf, ra->ra_port, ra->ra_size);
 1257                 free(outbuf, M_TEMP);
 1258                 break;
 1259         case SIOCSLAGG:
 1260                 error = priv_check(td, PRIV_NET_LAGG);
 1261                 if (error)
 1262                         break;
 1263                 if (ra->ra_proto >= LAGG_PROTO_MAX) {
 1264                         error = EPROTONOSUPPORT;
 1265                         break;
 1266                 }
 1267 
 1268                 LAGG_WLOCK(sc);
 1269                 lagg_proto_detach(sc);
 1270                 LAGG_UNLOCK_ASSERT(sc);
 1271                 lagg_proto_attach(sc, ra->ra_proto);
 1272                 break;
 1273         case SIOCGLAGGOPTS:
 1274                 ro->ro_opts = sc->sc_opts;
 1275                 if (sc->sc_proto == LAGG_PROTO_LACP) {
 1276                         struct lacp_softc *lsc;
 1277 
 1278                         lsc = (struct lacp_softc *)sc->sc_psc;
 1279                         if (lsc->lsc_debug.lsc_tx_test != 0)
 1280                                 ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
 1281                         if (lsc->lsc_debug.lsc_rx_test != 0)
 1282                                 ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
 1283                         if (lsc->lsc_strict_mode != 0)
 1284                                 ro->ro_opts |= LAGG_OPT_LACP_STRICT;
 1285                         if (lsc->lsc_fast_timeout != 0)
 1286                                 ro->ro_opts |= LAGG_OPT_LACP_TIMEOUT;
 1287 
 1288                         ro->ro_active = sc->sc_active;
 1289                 } else {
 1290                         ro->ro_active = 0;
 1291                         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1292                                 ro->ro_active += LAGG_PORTACTIVE(lp);
 1293                 }
 1294                 ro->ro_bkt = sc->sc_bkt;
 1295                 ro->ro_flapping = sc->sc_flapping;
 1296                 ro->ro_flowid_shift = sc->flowid_shift;
 1297                 break;
 1298         case SIOCSLAGGOPTS:
 1299                 if (sc->sc_proto == LAGG_PROTO_ROUNDROBIN) {
 1300                         if (ro->ro_bkt == 0)
 1301                                 sc->sc_bkt = 1; // Minimum 1 packet per iface.
 1302                         else
 1303                                 sc->sc_bkt = ro->ro_bkt;
 1304                 }
 1305                 error = priv_check(td, PRIV_NET_LAGG);
 1306                 if (error)
 1307                         break;
 1308                 if (ro->ro_opts == 0)
 1309                         break;
 1310                 /*
 1311                  * Set options.  LACP options are stored in sc->sc_psc,
 1312                  * not in sc_opts.
 1313                  */
 1314                 int valid, lacp;
 1315 
 1316                 switch (ro->ro_opts) {
 1317                 case LAGG_OPT_USE_FLOWID:
 1318                 case -LAGG_OPT_USE_FLOWID:
 1319                 case LAGG_OPT_FLOWIDSHIFT:
 1320                         valid = 1;
 1321                         lacp = 0;
 1322                         break;
 1323                 case LAGG_OPT_LACP_TXTEST:
 1324                 case -LAGG_OPT_LACP_TXTEST:
 1325                 case LAGG_OPT_LACP_RXTEST:
 1326                 case -LAGG_OPT_LACP_RXTEST:
 1327                 case LAGG_OPT_LACP_STRICT:
 1328                 case -LAGG_OPT_LACP_STRICT:
 1329                 case LAGG_OPT_LACP_TIMEOUT:
 1330                 case -LAGG_OPT_LACP_TIMEOUT:
 1331                         valid = lacp = 1;
 1332                         break;
 1333                 default:
 1334                         valid = lacp = 0;
 1335                         break;
 1336                 }
 1337 
 1338                 LAGG_WLOCK(sc);
 1339 
 1340                 if (valid == 0 ||
 1341                     (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
 1342                         /* Invalid combination of options specified. */
 1343                         error = EINVAL;
 1344                         LAGG_WUNLOCK(sc);
 1345                         break;  /* Return from SIOCSLAGGOPTS. */ 
 1346                 }
 1347                 /*
 1348                  * Store new options into sc->sc_opts except for
 1349                  * FLOWIDSHIFT and LACP options.
 1350                  */
 1351                 if (lacp == 0) {
 1352                         if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
 1353                                 sc->flowid_shift = ro->ro_flowid_shift;
 1354                         else if (ro->ro_opts > 0)
 1355                                 sc->sc_opts |= ro->ro_opts;
 1356                         else
 1357                                 sc->sc_opts &= ~ro->ro_opts;
 1358                 } else {
 1359                         struct lacp_softc *lsc;
 1360                         struct lacp_port *lp;
 1361 
 1362                         lsc = (struct lacp_softc *)sc->sc_psc;
 1363 
 1364                         switch (ro->ro_opts) {
 1365                         case LAGG_OPT_LACP_TXTEST:
 1366                                 lsc->lsc_debug.lsc_tx_test = 1;
 1367                                 break;
 1368                         case -LAGG_OPT_LACP_TXTEST:
 1369                                 lsc->lsc_debug.lsc_tx_test = 0;
 1370                                 break;
 1371                         case LAGG_OPT_LACP_RXTEST:
 1372                                 lsc->lsc_debug.lsc_rx_test = 1;
 1373                                 break;
 1374                         case -LAGG_OPT_LACP_RXTEST:
 1375                                 lsc->lsc_debug.lsc_rx_test = 0;
 1376                                 break;
 1377                         case LAGG_OPT_LACP_STRICT:
 1378                                 lsc->lsc_strict_mode = 1;
 1379                                 break;
 1380                         case -LAGG_OPT_LACP_STRICT:
 1381                                 lsc->lsc_strict_mode = 0;
 1382                                 break;
 1383                         case LAGG_OPT_LACP_TIMEOUT:
 1384                                 LACP_LOCK(lsc);
 1385                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1386                                         lp->lp_state |= LACP_STATE_TIMEOUT;
 1387                                 LACP_UNLOCK(lsc);
 1388                                 lsc->lsc_fast_timeout = 1;
 1389                                 break;
 1390                         case -LAGG_OPT_LACP_TIMEOUT:
 1391                                 LACP_LOCK(lsc);
 1392                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1393                                         lp->lp_state &= ~LACP_STATE_TIMEOUT;
 1394                                 LACP_UNLOCK(lsc);
 1395                                 lsc->lsc_fast_timeout = 0;
 1396                                 break;
 1397                         }
 1398                 }
 1399                 LAGG_WUNLOCK(sc);
 1400                 break;
 1401         case SIOCGLAGGFLAGS:
 1402                 rf->rf_flags = 0;
 1403                 LAGG_RLOCK(sc, &tracker);
 1404                 if (sc->sc_flags & MBUF_HASHFLAG_L2)
 1405                         rf->rf_flags |= LAGG_F_HASHL2;
 1406                 if (sc->sc_flags & MBUF_HASHFLAG_L3)
 1407                         rf->rf_flags |= LAGG_F_HASHL3;
 1408                 if (sc->sc_flags & MBUF_HASHFLAG_L4)
 1409                         rf->rf_flags |= LAGG_F_HASHL4;
 1410                 LAGG_RUNLOCK(sc, &tracker);
 1411                 break;
 1412         case SIOCSLAGGHASH:
 1413                 error = priv_check(td, PRIV_NET_LAGG);
 1414                 if (error)
 1415                         break;
 1416                 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
 1417                         error = EINVAL;
 1418                         break;
 1419                 }
 1420                 LAGG_WLOCK(sc);
 1421                 sc->sc_flags = 0;
 1422                 if (rf->rf_flags & LAGG_F_HASHL2)
 1423                         sc->sc_flags |= MBUF_HASHFLAG_L2;
 1424                 if (rf->rf_flags & LAGG_F_HASHL3)
 1425                         sc->sc_flags |= MBUF_HASHFLAG_L3;
 1426                 if (rf->rf_flags & LAGG_F_HASHL4)
 1427                         sc->sc_flags |= MBUF_HASHFLAG_L4;
 1428                 LAGG_WUNLOCK(sc);
 1429                 break;
 1430         case SIOCGLAGGPORT:
 1431                 if (rp->rp_portname[0] == '\0' ||
 1432                     (tpif = ifunit(rp->rp_portname)) == NULL) {
 1433                         error = EINVAL;
 1434                         break;
 1435                 }
 1436 
 1437                 LAGG_RLOCK(sc, &tracker);
 1438                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1439                     lp->lp_softc != sc) {
 1440                         error = ENOENT;
 1441                         LAGG_RUNLOCK(sc, &tracker);
 1442                         break;
 1443                 }
 1444 
 1445                 lagg_port2req(lp, rp);
 1446                 LAGG_RUNLOCK(sc, &tracker);
 1447                 break;
 1448         case SIOCSLAGGPORT:
 1449                 error = priv_check(td, PRIV_NET_LAGG);
 1450                 if (error)
 1451                         break;
 1452                 if (rp->rp_portname[0] == '\0' ||
 1453                     (tpif = ifunit(rp->rp_portname)) == NULL) {
 1454                         error = EINVAL;
 1455                         break;
 1456                 }
 1457 #ifdef INET6
 1458                 /*
 1459                  * A laggport interface should not have inet6 address
 1460                  * because two interfaces with a valid link-local
 1461                  * scope zone must not be merged in any form.  This
 1462                  * restriction is needed to prevent violation of
 1463                  * link-local scope zone.  Attempts to add a laggport
 1464                  * interface which has inet6 addresses triggers
 1465                  * removal of all inet6 addresses on the member
 1466                  * interface.
 1467                  */
 1468                 if (in6ifa_llaonifp(tpif)) {
 1469                         in6_ifdetach(tpif);
 1470                                 if_printf(sc->sc_ifp,
 1471                                     "IPv6 addresses on %s have been removed "
 1472                                     "before adding it as a member to prevent "
 1473                                     "IPv6 address scope violation.\n",
 1474                                     tpif->if_xname);
 1475                 }
 1476 #endif
 1477                 LAGG_WLOCK(sc);
 1478                 error = lagg_port_create(sc, tpif);
 1479                 LAGG_WUNLOCK(sc);
 1480                 break;
 1481         case SIOCSLAGGDELPORT:
 1482                 error = priv_check(td, PRIV_NET_LAGG);
 1483                 if (error)
 1484                         break;
 1485                 if (rp->rp_portname[0] == '\0' ||
 1486                     (tpif = ifunit(rp->rp_portname)) == NULL) {
 1487                         error = EINVAL;
 1488                         break;
 1489                 }
 1490 
 1491                 LAGG_WLOCK(sc);
 1492                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1493                     lp->lp_softc != sc) {
 1494                         error = ENOENT;
 1495                         LAGG_WUNLOCK(sc);
 1496                         break;
 1497                 }
 1498 
 1499                 error = lagg_port_destroy(lp, 1);
 1500                 LAGG_WUNLOCK(sc);
 1501                 break;
 1502         case SIOCSIFFLAGS:
 1503                 /* Set flags on ports too */
 1504                 LAGG_WLOCK(sc);
 1505                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1506                         lagg_setflags(lp, 1);
 1507                 }
 1508                 LAGG_WUNLOCK(sc);
 1509 
 1510                 if (!(ifp->if_flags & IFF_UP) &&
 1511                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1512                         /*
 1513                          * If interface is marked down and it is running,
 1514                          * then stop and disable it.
 1515                          */
 1516                         LAGG_WLOCK(sc);
 1517                         lagg_stop(sc);
 1518                         LAGG_WUNLOCK(sc);
 1519                 } else if ((ifp->if_flags & IFF_UP) &&
 1520                     !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1521                         /*
 1522                          * If interface is marked up and it is stopped, then
 1523                          * start it.
 1524                          */
 1525                         (*ifp->if_init)(sc);
 1526                 }
 1527                 break;
 1528         case SIOCADDMULTI:
 1529         case SIOCDELMULTI:
 1530                 LAGG_WLOCK(sc);
 1531                 error = lagg_ether_setmulti(sc);
 1532                 LAGG_WUNLOCK(sc);
 1533                 break;
 1534         case SIOCSIFMEDIA:
 1535         case SIOCGIFMEDIA:
 1536                 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 1537                 break;
 1538 
 1539         case SIOCSIFCAP:
 1540         case SIOCSIFMTU:
 1541                 /* Do not allow the MTU or caps to be directly changed */
 1542                 error = EINVAL;
 1543                 break;
 1544 
 1545         default:
 1546                 error = ether_ioctl(ifp, cmd, data);
 1547                 break;
 1548         }
 1549         return (error);
 1550 }
 1551 
 1552 static int
 1553 lagg_ether_setmulti(struct lagg_softc *sc)
 1554 {
 1555         struct lagg_port *lp;
 1556 
 1557         LAGG_WLOCK_ASSERT(sc);
 1558 
 1559         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1560                 /* First, remove any existing filter entries. */
 1561                 lagg_ether_cmdmulti(lp, 0);
 1562                 /* copy all addresses from the lagg interface to the port */
 1563                 lagg_ether_cmdmulti(lp, 1);
 1564         }
 1565         return (0);
 1566 }
 1567 
 1568 static int
 1569 lagg_ether_cmdmulti(struct lagg_port *lp, int set)
 1570 {
 1571         struct lagg_softc *sc = lp->lp_softc;
 1572         struct ifnet *ifp = lp->lp_ifp;
 1573         struct ifnet *scifp = sc->sc_ifp;
 1574         struct lagg_mc *mc;
 1575         struct ifmultiaddr *ifma;
 1576         int error;
 1577 
 1578         LAGG_WLOCK_ASSERT(sc);
 1579 
 1580         if (set) {
 1581                 IF_ADDR_WLOCK(scifp);
 1582                 TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
 1583                         if (ifma->ifma_addr->sa_family != AF_LINK)
 1584                                 continue;
 1585                         mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
 1586                         if (mc == NULL) {
 1587                                 IF_ADDR_WUNLOCK(scifp);
 1588                                 return (ENOMEM);
 1589                         }
 1590                         bcopy(ifma->ifma_addr, &mc->mc_addr,
 1591                             ifma->ifma_addr->sa_len);
 1592                         mc->mc_addr.sdl_index = ifp->if_index;
 1593                         mc->mc_ifma = NULL;
 1594                         SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
 1595                 }
 1596                 IF_ADDR_WUNLOCK(scifp);
 1597                 SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
 1598                         error = if_addmulti(ifp,
 1599                             (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
 1600                         if (error)
 1601                                 return (error);
 1602                 }
 1603         } else {
 1604                 while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
 1605                         SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
 1606                         if (mc->mc_ifma && !lp->lp_detaching)
 1607                                 if_delmulti_ifma(mc->mc_ifma);
 1608                         free(mc, M_DEVBUF);
 1609                 }
 1610         }
 1611         return (0);
 1612 }
 1613 
 1614 /* Handle a ref counted flag that should be set on the lagg port as well */
 1615 static int
 1616 lagg_setflag(struct lagg_port *lp, int flag, int status,
 1617     int (*func)(struct ifnet *, int))
 1618 {
 1619         struct lagg_softc *sc = lp->lp_softc;
 1620         struct ifnet *scifp = sc->sc_ifp;
 1621         struct ifnet *ifp = lp->lp_ifp;
 1622         int error;
 1623 
 1624         LAGG_WLOCK_ASSERT(sc);
 1625 
 1626         status = status ? (scifp->if_flags & flag) : 0;
 1627         /* Now "status" contains the flag value or 0 */
 1628 
 1629         /*
 1630          * See if recorded ports status is different from what
 1631          * we want it to be.  If it is, flip it.  We record ports
 1632          * status in lp_ifflags so that we won't clear ports flag
 1633          * we haven't set.  In fact, we don't clear or set ports
 1634          * flags directly, but get or release references to them.
 1635          * That's why we can be sure that recorded flags still are
 1636          * in accord with actual ports flags.
 1637          */
 1638         if (status != (lp->lp_ifflags & flag)) {
 1639                 error = (*func)(ifp, status);
 1640                 if (error)
 1641                         return (error);
 1642                 lp->lp_ifflags &= ~flag;
 1643                 lp->lp_ifflags |= status;
 1644         }
 1645         return (0);
 1646 }
 1647 
 1648 /*
 1649  * Handle IFF_* flags that require certain changes on the lagg port
 1650  * if "status" is true, update ports flags respective to the lagg
 1651  * if "status" is false, forcedly clear the flags set on port.
 1652  */
 1653 static int
 1654 lagg_setflags(struct lagg_port *lp, int status)
 1655 {
 1656         int error, i;
 1657 
 1658         for (i = 0; lagg_pflags[i].flag; i++) {
 1659                 error = lagg_setflag(lp, lagg_pflags[i].flag,
 1660                     status, lagg_pflags[i].func);
 1661                 if (error)
 1662                         return (error);
 1663         }
 1664         return (0);
 1665 }
 1666 
 1667 static int
 1668 lagg_transmit(struct ifnet *ifp, struct mbuf *m)
 1669 {
 1670         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1671         int error, len, mcast;
 1672         struct rm_priotracker tracker;
 1673 
 1674         len = m->m_pkthdr.len;
 1675         mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 1676 
 1677         LAGG_RLOCK(sc, &tracker);
 1678         /* We need a Tx algorithm and at least one port */
 1679         if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
 1680                 LAGG_RUNLOCK(sc, &tracker);
 1681                 m_freem(m);
 1682                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1683                 return (ENXIO);
 1684         }
 1685 
 1686         ETHER_BPF_MTAP(ifp, m);
 1687 
 1688         error = lagg_proto_start(sc, m);
 1689         LAGG_RUNLOCK(sc, &tracker);
 1690 
 1691         if (error != 0)
 1692                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1693 
 1694         return (error);
 1695 }
 1696 
 1697 /*
 1698  * The ifp->if_qflush entry point for lagg(4) is no-op.
 1699  */
 1700 static void
 1701 lagg_qflush(struct ifnet *ifp __unused)
 1702 {
 1703 }
 1704 
 1705 static struct mbuf *
 1706 lagg_input(struct ifnet *ifp, struct mbuf *m)
 1707 {
 1708         struct lagg_port *lp = ifp->if_lagg;
 1709         struct lagg_softc *sc = lp->lp_softc;
 1710         struct ifnet *scifp = sc->sc_ifp;
 1711         struct rm_priotracker tracker;
 1712 
 1713         LAGG_RLOCK(sc, &tracker);
 1714         if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 1715             (lp->lp_flags & LAGG_PORT_DISABLED) ||
 1716             sc->sc_proto == LAGG_PROTO_NONE) {
 1717                 LAGG_RUNLOCK(sc, &tracker);
 1718                 m_freem(m);
 1719                 return (NULL);
 1720         }
 1721 
 1722         ETHER_BPF_MTAP(scifp, m);
 1723 
 1724         if (lp->lp_detaching != 0) {
 1725                 m_freem(m);
 1726                 m = NULL;
 1727         } else
 1728                 m = lagg_proto_input(sc, lp, m);
 1729 
 1730         if (m != NULL) {
 1731                 if (scifp->if_flags & IFF_MONITOR) {
 1732                         m_freem(m);
 1733                         m = NULL;
 1734                 }
 1735         }
 1736 
 1737         LAGG_RUNLOCK(sc, &tracker);
 1738         return (m);
 1739 }
 1740 
 1741 static int
 1742 lagg_media_change(struct ifnet *ifp)
 1743 {
 1744         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1745 
 1746         if (sc->sc_ifflags & IFF_DEBUG)
 1747                 printf("%s\n", __func__);
 1748 
 1749         /* Ignore */
 1750         return (0);
 1751 }
 1752 
 1753 static void
 1754 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
 1755 {
 1756         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1757         struct lagg_port *lp;
 1758         struct rm_priotracker tracker;
 1759 
 1760         imr->ifm_status = IFM_AVALID;
 1761         imr->ifm_active = IFM_ETHER | IFM_AUTO;
 1762 
 1763         LAGG_RLOCK(sc, &tracker);
 1764         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1765                 if (LAGG_PORTACTIVE(lp))
 1766                         imr->ifm_status |= IFM_ACTIVE;
 1767         }
 1768         LAGG_RUNLOCK(sc, &tracker);
 1769 }
 1770 
 1771 static void
 1772 lagg_linkstate(struct lagg_softc *sc)
 1773 {
 1774         struct lagg_port *lp;
 1775         int new_link = LINK_STATE_DOWN;
 1776         uint64_t speed;
 1777 
 1778         /* Our link is considered up if at least one of our ports is active */
 1779         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1780                 if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
 1781                         new_link = LINK_STATE_UP;
 1782                         break;
 1783                 }
 1784         }
 1785         if_link_state_change(sc->sc_ifp, new_link);
 1786 
 1787         /* Update if_baudrate to reflect the max possible speed */
 1788         switch (sc->sc_proto) {
 1789                 case LAGG_PROTO_FAILOVER:
 1790                         sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
 1791                             sc->sc_primary->lp_ifp->if_baudrate : 0;
 1792                         break;
 1793                 case LAGG_PROTO_ROUNDROBIN:
 1794                 case LAGG_PROTO_LOADBALANCE:
 1795                 case LAGG_PROTO_BROADCAST:
 1796                         speed = 0;
 1797                         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1798                                 speed += lp->lp_ifp->if_baudrate;
 1799                         sc->sc_ifp->if_baudrate = speed;
 1800                         break;
 1801                 case LAGG_PROTO_LACP:
 1802                         /* LACP updates if_baudrate itself */
 1803                         break;
 1804         }
 1805 }
 1806 
 1807 static void
 1808 lagg_port_state(struct ifnet *ifp, int state)
 1809 {
 1810         struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
 1811         struct lagg_softc *sc = NULL;
 1812 
 1813         if (lp != NULL)
 1814                 sc = lp->lp_softc;
 1815         if (sc == NULL)
 1816                 return;
 1817 
 1818         LAGG_WLOCK(sc);
 1819         lagg_linkstate(sc);
 1820         lagg_proto_linkstate(sc, lp);
 1821         LAGG_WUNLOCK(sc);
 1822 }
 1823 
 1824 struct lagg_port *
 1825 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
 1826 {
 1827         struct lagg_port *lp_next, *rval = NULL;
 1828         // int new_link = LINK_STATE_DOWN;
 1829 
 1830         LAGG_RLOCK_ASSERT(sc);
 1831         /*
 1832          * Search a port which reports an active link state.
 1833          */
 1834 
 1835         if (lp == NULL)
 1836                 goto search;
 1837         if (LAGG_PORTACTIVE(lp)) {
 1838                 rval = lp;
 1839                 goto found;
 1840         }
 1841         if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
 1842             LAGG_PORTACTIVE(lp_next)) {
 1843                 rval = lp_next;
 1844                 goto found;
 1845         }
 1846 
 1847 search:
 1848         SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 1849                 if (LAGG_PORTACTIVE(lp_next)) {
 1850                         rval = lp_next;
 1851                         goto found;
 1852                 }
 1853         }
 1854 
 1855 found:
 1856         if (rval != NULL) {
 1857                 /*
 1858                  * The IEEE 802.1D standard assumes that a lagg with
 1859                  * multiple ports is always full duplex. This is valid
 1860                  * for load sharing laggs and if at least two links
 1861                  * are active. Unfortunately, checking the latter would
 1862                  * be too expensive at this point.
 1863                  XXX
 1864                 if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
 1865                     (sc->sc_count > 1))
 1866                         new_link = LINK_STATE_FULL_DUPLEX;
 1867                 else
 1868                         new_link = rval->lp_link_state;
 1869                  */
 1870         }
 1871 
 1872         return (rval);
 1873 }
 1874 
 1875 int
 1876 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
 1877 {
 1878 
 1879         return (ifp->if_transmit)(ifp, m);
 1880 }
 1881 
 1882 /*
 1883  * Simple round robin aggregation
 1884  */
 1885 static void
 1886 lagg_rr_attach(struct lagg_softc *sc)
 1887 {
 1888         sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
 1889         sc->sc_seq = 0;
 1890         sc->sc_bkt_count = sc->sc_bkt;
 1891 }
 1892 
 1893 static int
 1894 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
 1895 {
 1896         struct lagg_port *lp;
 1897         uint32_t p;
 1898 
 1899         if (sc->sc_bkt_count == 0 && sc->sc_bkt > 0)
 1900                 sc->sc_bkt_count = sc->sc_bkt;
 1901 
 1902         if (sc->sc_bkt > 0) {
 1903                 atomic_subtract_int(&sc->sc_bkt_count, 1);
 1904         if (atomic_cmpset_int(&sc->sc_bkt_count, 0, sc->sc_bkt))
 1905                 p = atomic_fetchadd_32(&sc->sc_seq, 1);
 1906         else
 1907                 p = sc->sc_seq; 
 1908         } else
 1909                 p = atomic_fetchadd_32(&sc->sc_seq, 1);
 1910 
 1911         p %= sc->sc_count;
 1912         lp = SLIST_FIRST(&sc->sc_ports);
 1913 
 1914         while (p--)
 1915                 lp = SLIST_NEXT(lp, lp_entries);
 1916 
 1917         /*
 1918          * Check the port's link state. This will return the next active
 1919          * port if the link is down or the port is NULL.
 1920          */
 1921         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 1922                 m_freem(m);
 1923                 return (ENETDOWN);
 1924         }
 1925 
 1926         /* Send mbuf */
 1927         return (lagg_enqueue(lp->lp_ifp, m));
 1928 }
 1929 
 1930 static struct mbuf *
 1931 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 1932 {
 1933         struct ifnet *ifp = sc->sc_ifp;
 1934 
 1935         /* Just pass in the packet to our lagg device */
 1936         m->m_pkthdr.rcvif = ifp;
 1937 
 1938         return (m);
 1939 }
 1940 
 1941 /*
 1942  * Broadcast mode
 1943  */
 1944 static int
 1945 lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
 1946 {
 1947         int active_ports = 0;
 1948         int errors = 0;
 1949         int ret;
 1950         struct lagg_port *lp, *last = NULL;
 1951         struct mbuf *m0;
 1952 
 1953         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1954                 if (!LAGG_PORTACTIVE(lp))
 1955                         continue;
 1956 
 1957                 active_ports++;
 1958 
 1959                 if (last != NULL) {
 1960                         m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 1961                         if (m0 == NULL) {
 1962                                 ret = ENOBUFS;
 1963                                 errors++;
 1964                                 break;
 1965                         }
 1966 
 1967                         ret = lagg_enqueue(last->lp_ifp, m0);
 1968                         if (ret != 0)
 1969                                 errors++;
 1970                 }
 1971                 last = lp;
 1972         }
 1973         if (last == NULL) {
 1974                 m_freem(m);
 1975                 return (ENOENT);
 1976         }
 1977         if ((last = lagg_link_active(sc, last)) == NULL) {
 1978                 m_freem(m);
 1979                 return (ENETDOWN);
 1980         }
 1981 
 1982         ret = lagg_enqueue(last->lp_ifp, m);
 1983         if (ret != 0)
 1984                 errors++;
 1985 
 1986         if (errors == 0)
 1987                 return (ret);
 1988 
 1989         return (0);
 1990 }
 1991 
 1992 static struct mbuf*
 1993 lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 1994 {
 1995         struct ifnet *ifp = sc->sc_ifp;
 1996 
 1997         /* Just pass in the packet to our lagg device */
 1998         m->m_pkthdr.rcvif = ifp;
 1999         return (m);
 2000 }
 2001 
 2002 /*
 2003  * Active failover
 2004  */
 2005 static int
 2006 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
 2007 {
 2008         struct lagg_port *lp;
 2009 
 2010         /* Use the master port if active or the next available port */
 2011         if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
 2012                 m_freem(m);
 2013                 return (ENETDOWN);
 2014         }
 2015 
 2016         /* Send mbuf */
 2017         return (lagg_enqueue(lp->lp_ifp, m));
 2018 }
 2019 
 2020 static struct mbuf *
 2021 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2022 {
 2023         struct ifnet *ifp = sc->sc_ifp;
 2024         struct lagg_port *tmp_tp;
 2025 
 2026         if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
 2027                 m->m_pkthdr.rcvif = ifp;
 2028                 return (m);
 2029         }
 2030 
 2031         if (!LAGG_PORTACTIVE(sc->sc_primary)) {
 2032                 tmp_tp = lagg_link_active(sc, sc->sc_primary);
 2033                 /*
 2034                  * If tmp_tp is null, we've received a packet when all
 2035                  * our links are down. Weird, but process it anyways.
 2036                  */
 2037                 if ((tmp_tp == NULL || tmp_tp == lp)) {
 2038                         m->m_pkthdr.rcvif = ifp;
 2039                         return (m);
 2040                 }
 2041         }
 2042 
 2043         m_freem(m);
 2044         return (NULL);
 2045 }
 2046 
 2047 /*
 2048  * Loadbalancing
 2049  */
 2050 static void
 2051 lagg_lb_attach(struct lagg_softc *sc)
 2052 {
 2053         struct lagg_port *lp;
 2054         struct lagg_lb *lb;
 2055 
 2056         lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
 2057 
 2058         sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
 2059 
 2060         lb->lb_key = m_ether_tcpip_hash_init();
 2061         sc->sc_psc = lb;
 2062 
 2063         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2064                 lagg_lb_port_create(lp);
 2065 }
 2066 
 2067 static void
 2068 lagg_lb_detach(struct lagg_softc *sc)
 2069 {
 2070         struct lagg_lb *lb;
 2071 
 2072         lb = (struct lagg_lb *)sc->sc_psc;
 2073         LAGG_WUNLOCK(sc);
 2074         if (lb != NULL)
 2075                 free(lb, M_DEVBUF);
 2076 }
 2077 
 2078 static int
 2079 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
 2080 {
 2081         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 2082         struct lagg_port *lp_next;
 2083         int i = 0;
 2084 
 2085         bzero(&lb->lb_ports, sizeof(lb->lb_ports));
 2086         SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 2087                 if (lp_next == lp)
 2088                         continue;
 2089                 if (i >= LAGG_MAX_PORTS)
 2090                         return (EINVAL);
 2091                 if (sc->sc_ifflags & IFF_DEBUG)
 2092                         printf("%s: port %s at index %d\n",
 2093                             sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
 2094                 lb->lb_ports[i++] = lp_next;
 2095         }
 2096 
 2097         return (0);
 2098 }
 2099 
 2100 static int
 2101 lagg_lb_port_create(struct lagg_port *lp)
 2102 {
 2103         struct lagg_softc *sc = lp->lp_softc;
 2104         return (lagg_lb_porttable(sc, NULL));
 2105 }
 2106 
 2107 static void
 2108 lagg_lb_port_destroy(struct lagg_port *lp)
 2109 {
 2110         struct lagg_softc *sc = lp->lp_softc;
 2111         lagg_lb_porttable(sc, lp);
 2112 }
 2113 
 2114 static int
 2115 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
 2116 {
 2117         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 2118         struct lagg_port *lp = NULL;
 2119         uint32_t p = 0;
 2120 
 2121         if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
 2122             M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 2123                 p = m->m_pkthdr.flowid >> sc->flowid_shift;
 2124         else
 2125                 p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
 2126         p %= sc->sc_count;
 2127         lp = lb->lb_ports[p];
 2128 
 2129         /*
 2130          * Check the port's link state. This will return the next active
 2131          * port if the link is down or the port is NULL.
 2132          */
 2133         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 2134                 m_freem(m);
 2135                 return (ENETDOWN);
 2136         }
 2137 
 2138         /* Send mbuf */
 2139         return (lagg_enqueue(lp->lp_ifp, m));
 2140 }
 2141 
 2142 static struct mbuf *
 2143 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2144 {
 2145         struct ifnet *ifp = sc->sc_ifp;
 2146 
 2147         /* Just pass in the packet to our lagg device */
 2148         m->m_pkthdr.rcvif = ifp;
 2149 
 2150         return (m);
 2151 }
 2152 
 2153 /*
 2154  * 802.3ad LACP
 2155  */
 2156 static void
 2157 lagg_lacp_attach(struct lagg_softc *sc)
 2158 {
 2159         struct lagg_port *lp;
 2160 
 2161         lacp_attach(sc);
 2162         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2163                 lacp_port_create(lp);
 2164 }
 2165 
 2166 static void
 2167 lagg_lacp_detach(struct lagg_softc *sc)
 2168 {
 2169         struct lagg_port *lp;
 2170         void *psc;
 2171 
 2172         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2173                 lacp_port_destroy(lp);
 2174 
 2175         psc = sc->sc_psc;
 2176         sc->sc_psc = NULL;
 2177         LAGG_WUNLOCK(sc);
 2178 
 2179         lacp_detach(psc);
 2180 }
 2181 
 2182 static void
 2183 lagg_lacp_lladdr(struct lagg_softc *sc)
 2184 {
 2185         struct lagg_port *lp;
 2186 
 2187         /* purge all the lacp ports */
 2188         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2189                 lacp_port_destroy(lp);
 2190 
 2191         /* add them back in */
 2192         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2193                 lacp_port_create(lp);
 2194 }
 2195 
 2196 static int
 2197 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
 2198 {
 2199         struct lagg_port *lp;
 2200 
 2201         lp = lacp_select_tx_port(sc, m);
 2202         if (lp == NULL) {
 2203                 m_freem(m);
 2204                 return (ENETDOWN);
 2205         }
 2206 
 2207         /* Send mbuf */
 2208         return (lagg_enqueue(lp->lp_ifp, m));
 2209 }
 2210 
 2211 static struct mbuf *
 2212 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2213 {
 2214         struct ifnet *ifp = sc->sc_ifp;
 2215         struct ether_header *eh;
 2216         u_short etype;
 2217 
 2218         eh = mtod(m, struct ether_header *);
 2219         etype = ntohs(eh->ether_type);
 2220 
 2221         /* Tap off LACP control messages */
 2222         if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
 2223                 m = lacp_input(lp, m);
 2224                 if (m == NULL)
 2225                         return (NULL);
 2226         }
 2227 
 2228         /*
 2229          * If the port is not collecting or not in the active aggregator then
 2230          * free and return.
 2231          */
 2232         if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
 2233                 m_freem(m);
 2234                 return (NULL);
 2235         }
 2236 
 2237         m->m_pkthdr.rcvif = ifp;
 2238         return (m);
 2239 }
 2240 

Cache object: 767df272c5bdcadb71a8565af6bb9906


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.