The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_lagg.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $      */
    2 
    3 /*
    4  * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
    5  * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
    6  * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
    7  *
    8  * Permission to use, copy, modify, and distribute this software for any
    9  * purpose with or without fee is hereby granted, provided that the above
   10  * copyright notice and this permission notice appear in all copies.
   11  *
   12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   19  */
   20 
   21 #include <sys/cdefs.h>
   22 __FBSDID("$FreeBSD$");
   23 
   24 #include "opt_inet.h"
   25 #include "opt_inet6.h"
   26 #include "opt_ratelimit.h"
   27 
   28 #include <sys/param.h>
   29 #include <sys/kernel.h>
   30 #include <sys/malloc.h>
   31 #include <sys/mbuf.h>
   32 #include <sys/queue.h>
   33 #include <sys/socket.h>
   34 #include <sys/sockio.h>
   35 #include <sys/sysctl.h>
   36 #include <sys/module.h>
   37 #include <sys/priv.h>
   38 #include <sys/systm.h>
   39 #include <sys/proc.h>
   40 #include <sys/lock.h>
   41 #include <sys/rmlock.h>
   42 #include <sys/sx.h>
   43 #include <sys/taskqueue.h>
   44 #include <sys/eventhandler.h>
   45 
   46 #include <net/ethernet.h>
   47 #include <net/if.h>
   48 #include <net/if_clone.h>
   49 #include <net/if_arp.h>
   50 #include <net/if_dl.h>
   51 #include <net/if_media.h>
   52 #include <net/if_types.h>
   53 #include <net/if_var.h>
   54 #include <net/bpf.h>
   55 #include <net/route.h>
   56 #include <net/vnet.h>
   57 #include <net/infiniband.h>
   58 
   59 #if defined(INET) || defined(INET6)
   60 #include <netinet/in.h>
   61 #include <netinet/ip.h>
   62 #endif
   63 #ifdef INET
   64 #include <netinet/in_systm.h>
   65 #include <netinet/if_ether.h>
   66 #endif
   67 
   68 #ifdef INET6
   69 #include <netinet/ip6.h>
   70 #include <netinet6/in6_var.h>
   71 #include <netinet6/in6_ifattach.h>
   72 #endif
   73 
   74 #include <net/if_vlan_var.h>
   75 #include <net/if_lagg.h>
   76 #include <net/ieee8023ad_lacp.h>
   77 
   78 #ifdef INET6
   79 /*
   80  * XXX: declare here to avoid to include many inet6 related files..
   81  * should be more generalized?
   82  */
   83 extern void     nd6_setmtu(struct ifnet *);
   84 #endif
   85 
   86 #define LAGG_RLOCK()    struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et)
   87 #define LAGG_RUNLOCK()  epoch_exit_preempt(net_epoch_preempt, &lagg_et)
   88 #define LAGG_RLOCK_ASSERT()     MPASS(in_epoch(net_epoch_preempt))
   89 #define LAGG_UNLOCK_ASSERT()    MPASS(!in_epoch(net_epoch_preempt))
   90 
   91 #define LAGG_SX_INIT(_sc)       sx_init(&(_sc)->sc_sx, "if_lagg sx")
   92 #define LAGG_SX_DESTROY(_sc)    sx_destroy(&(_sc)->sc_sx)
   93 #define LAGG_XLOCK(_sc)         sx_xlock(&(_sc)->sc_sx)
   94 #define LAGG_XUNLOCK(_sc)       sx_xunlock(&(_sc)->sc_sx)
   95 #define LAGG_SXLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_LOCKED)
   96 #define LAGG_XLOCK_ASSERT(_sc)  sx_assert(&(_sc)->sc_sx, SA_XLOCKED)
   97 
   98 /* Special flags we should propagate to the lagg ports. */
   99 static struct {
  100         int flag;
  101         int (*func)(struct ifnet *, int);
  102 } lagg_pflags[] = {
  103         {IFF_PROMISC, ifpromisc},
  104         {IFF_ALLMULTI, if_allmulti},
  105         {0, NULL}
  106 };
  107 
  108 VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
  109 #define V_lagg_list     VNET(lagg_list)
  110 VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
  111 #define V_lagg_list_mtx VNET(lagg_list_mtx)
  112 #define LAGG_LIST_LOCK_INIT(x)          mtx_init(&V_lagg_list_mtx, \
  113                                         "if_lagg list", NULL, MTX_DEF)
  114 #define LAGG_LIST_LOCK_DESTROY(x)       mtx_destroy(&V_lagg_list_mtx)
  115 #define LAGG_LIST_LOCK(x)               mtx_lock(&V_lagg_list_mtx)
  116 #define LAGG_LIST_UNLOCK(x)             mtx_unlock(&V_lagg_list_mtx)
  117 eventhandler_tag        lagg_detach_cookie = NULL;
  118 
  119 static int      lagg_clone_create(struct if_clone *, int, caddr_t);
  120 static void     lagg_clone_destroy(struct ifnet *);
  121 VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner);
  122 #define V_lagg_cloner   VNET(lagg_cloner)
  123 static const char laggname[] = "lagg";
  124 static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface");
  125 
  126 static void     lagg_capabilities(struct lagg_softc *);
  127 static int      lagg_port_create(struct lagg_softc *, struct ifnet *);
  128 static int      lagg_port_destroy(struct lagg_port *, int);
  129 static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *);
  130 static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *);
  131 static void     lagg_linkstate(struct lagg_softc *);
  132 static void     lagg_port_state(struct ifnet *, int);
  133 static int      lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
  134 static int      lagg_port_output(struct ifnet *, struct mbuf *,
  135                     const struct sockaddr *, struct route *);
  136 static void     lagg_port_ifdetach(void *arg __unused, struct ifnet *);
  137 #ifdef LAGG_PORT_STACKING
  138 static int      lagg_port_checkstacking(struct lagg_softc *);
  139 #endif
  140 static void     lagg_port2req(struct lagg_port *, struct lagg_reqport *);
  141 static void     lagg_init(void *);
  142 static void     lagg_stop(struct lagg_softc *);
  143 static int      lagg_ioctl(struct ifnet *, u_long, caddr_t);
  144 #ifdef RATELIMIT
  145 static int      lagg_snd_tag_alloc(struct ifnet *,
  146                     union if_snd_tag_alloc_params *,
  147                     struct m_snd_tag **);
  148 #endif
  149 static int      lagg_setmulti(struct lagg_port *);
  150 static int      lagg_clrmulti(struct lagg_port *);
  151 static  int     lagg_setcaps(struct lagg_port *, int cap);
  152 static  int     lagg_setflag(struct lagg_port *, int, int,
  153                     int (*func)(struct ifnet *, int));
  154 static  int     lagg_setflags(struct lagg_port *, int status);
  155 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
  156 static int      lagg_transmit_ethernet(struct ifnet *, struct mbuf *);
  157 static int      lagg_transmit_infiniband(struct ifnet *, struct mbuf *);
  158 static void     lagg_qflush(struct ifnet *);
  159 static int      lagg_media_change(struct ifnet *);
  160 static void     lagg_media_status(struct ifnet *, struct ifmediareq *);
  161 static struct lagg_port *lagg_link_active(struct lagg_softc *,
  162             struct lagg_port *);
  163 
  164 /* Simple round robin */
  165 static void     lagg_rr_attach(struct lagg_softc *);
  166 static int      lagg_rr_start(struct lagg_softc *, struct mbuf *);
  167 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
  168                     struct mbuf *);
  169 
  170 /* Active failover */
  171 static int      lagg_fail_start(struct lagg_softc *, struct mbuf *);
  172 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
  173                     struct mbuf *);
  174 
  175 /* Loadbalancing */
  176 static void     lagg_lb_attach(struct lagg_softc *);
  177 static void     lagg_lb_detach(struct lagg_softc *);
  178 static int      lagg_lb_port_create(struct lagg_port *);
  179 static void     lagg_lb_port_destroy(struct lagg_port *);
  180 static int      lagg_lb_start(struct lagg_softc *, struct mbuf *);
  181 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
  182                     struct mbuf *);
  183 static int      lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
  184 
  185 /* Broadcast */
  186 static int    lagg_bcast_start(struct lagg_softc *, struct mbuf *);
  187 static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
  188                     struct mbuf *);
  189 
  190 /* 802.3ad LACP */
  191 static void     lagg_lacp_attach(struct lagg_softc *);
  192 static void     lagg_lacp_detach(struct lagg_softc *);
  193 static int      lagg_lacp_start(struct lagg_softc *, struct mbuf *);
  194 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
  195                     struct mbuf *);
  196 static void     lagg_lacp_lladdr(struct lagg_softc *);
  197 
  198 /* lagg protocol table */
  199 static const struct lagg_proto {
  200         lagg_proto      pr_num;
  201         void            (*pr_attach)(struct lagg_softc *);
  202         void            (*pr_detach)(struct lagg_softc *);
  203         int             (*pr_start)(struct lagg_softc *, struct mbuf *);
  204         struct mbuf *   (*pr_input)(struct lagg_softc *, struct lagg_port *,
  205                             struct mbuf *);
  206         int             (*pr_addport)(struct lagg_port *);
  207         void            (*pr_delport)(struct lagg_port *);
  208         void            (*pr_linkstate)(struct lagg_port *);
  209         void            (*pr_init)(struct lagg_softc *);
  210         void            (*pr_stop)(struct lagg_softc *);
  211         void            (*pr_lladdr)(struct lagg_softc *);
  212         void            (*pr_request)(struct lagg_softc *, void *);
  213         void            (*pr_portreq)(struct lagg_port *, void *);
  214 } lagg_protos[] = {
  215     {
  216         .pr_num = LAGG_PROTO_NONE
  217     },
  218     {
  219         .pr_num = LAGG_PROTO_ROUNDROBIN,
  220         .pr_attach = lagg_rr_attach,
  221         .pr_start = lagg_rr_start,
  222         .pr_input = lagg_rr_input,
  223     },
  224     {
  225         .pr_num = LAGG_PROTO_FAILOVER,
  226         .pr_start = lagg_fail_start,
  227         .pr_input = lagg_fail_input,
  228     },
  229     {
  230         .pr_num = LAGG_PROTO_LOADBALANCE,
  231         .pr_attach = lagg_lb_attach,
  232         .pr_detach = lagg_lb_detach,
  233         .pr_start = lagg_lb_start,
  234         .pr_input = lagg_lb_input,
  235         .pr_addport = lagg_lb_port_create,
  236         .pr_delport = lagg_lb_port_destroy,
  237     },
  238     {
  239         .pr_num = LAGG_PROTO_LACP,
  240         .pr_attach = lagg_lacp_attach,
  241         .pr_detach = lagg_lacp_detach,
  242         .pr_start = lagg_lacp_start,
  243         .pr_input = lagg_lacp_input,
  244         .pr_addport = lacp_port_create,
  245         .pr_delport = lacp_port_destroy,
  246         .pr_linkstate = lacp_linkstate,
  247         .pr_init = lacp_init,
  248         .pr_stop = lacp_stop,
  249         .pr_lladdr = lagg_lacp_lladdr,
  250         .pr_request = lacp_req,
  251         .pr_portreq = lacp_portreq,
  252     },
  253     {
  254         .pr_num = LAGG_PROTO_BROADCAST,
  255         .pr_start = lagg_bcast_start,
  256         .pr_input = lagg_bcast_input,
  257     },
  258 };
  259 
  260 SYSCTL_DECL(_net_link);
  261 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
  262     "Link Aggregation");
  263 
  264 /* Allow input on any failover links */
  265 VNET_DEFINE_STATIC(int, lagg_failover_rx_all);
  266 #define V_lagg_failover_rx_all  VNET(lagg_failover_rx_all)
  267 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
  268     &VNET_NAME(lagg_failover_rx_all), 0,
  269     "Accept input from any interface in a failover lagg");
  270 
  271 /* Default value for using flowid */
  272 VNET_DEFINE_STATIC(int, def_use_flowid) = 0;
  273 #define V_def_use_flowid        VNET(def_use_flowid)
  274 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
  275     &VNET_NAME(def_use_flowid), 0,
  276     "Default setting for using flow id for load sharing");
  277 
  278 /* Default value for flowid shift */
  279 VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
  280 #define V_def_flowid_shift      VNET(def_flowid_shift)
  281 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
  282     &VNET_NAME(def_flowid_shift), 0,
  283     "Default setting for flowid shift for load sharing");
  284 
  285 static void
  286 vnet_lagg_init(const void *unused __unused)
  287 {
  288 
  289         LAGG_LIST_LOCK_INIT();
  290         SLIST_INIT(&V_lagg_list);
  291         V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
  292             lagg_clone_destroy, 0);
  293 }
  294 VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
  295     vnet_lagg_init, NULL);
  296 
  297 static void
  298 vnet_lagg_uninit(const void *unused __unused)
  299 {
  300 
  301         if_clone_detach(V_lagg_cloner);
  302         LAGG_LIST_LOCK_DESTROY();
  303 }
  304 VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
  305     vnet_lagg_uninit, NULL);
  306 
  307 static int
  308 lagg_modevent(module_t mod, int type, void *data)
  309 {
  310 
  311         switch (type) {
  312         case MOD_LOAD:
  313                 lagg_input_ethernet_p = lagg_input_ethernet;
  314                 lagg_input_infiniband_p = lagg_input_infiniband;
  315                 lagg_linkstate_p = lagg_port_state;
  316                 lagg_detach_cookie = EVENTHANDLER_REGISTER(
  317                     ifnet_departure_event, lagg_port_ifdetach, NULL,
  318                     EVENTHANDLER_PRI_ANY);
  319                 break;
  320         case MOD_UNLOAD:
  321                 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
  322                     lagg_detach_cookie);
  323                 lagg_input_ethernet_p = NULL;
  324                 lagg_input_infiniband_p = NULL;
  325                 lagg_linkstate_p = NULL;
  326                 break;
  327         default:
  328                 return (EOPNOTSUPP);
  329         }
  330         return (0);
  331 }
  332 
  333 static moduledata_t lagg_mod = {
  334         "if_lagg",
  335         lagg_modevent,
  336         0
  337 };
  338 
  339 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  340 MODULE_VERSION(if_lagg, 1);
  341 MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1);
  342 
  343 static void
  344 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
  345 {
  346 
  347         LAGG_XLOCK_ASSERT(sc);
  348         KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
  349             __func__, sc));
  350 
  351         if (sc->sc_ifflags & IFF_DEBUG)
  352                 if_printf(sc->sc_ifp, "using proto %u\n", pr);
  353 
  354         if (lagg_protos[pr].pr_attach != NULL)
  355                 lagg_protos[pr].pr_attach(sc);
  356         sc->sc_proto = pr;
  357 }
  358 
  359 static void
  360 lagg_proto_detach(struct lagg_softc *sc)
  361 {
  362         lagg_proto pr;
  363 
  364         LAGG_XLOCK_ASSERT(sc);
  365         pr = sc->sc_proto;
  366         sc->sc_proto = LAGG_PROTO_NONE;
  367 
  368         if (lagg_protos[pr].pr_detach != NULL)
  369                 lagg_protos[pr].pr_detach(sc);
  370 }
  371 
  372 static int
  373 lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
  374 {
  375 
  376         return (lagg_protos[sc->sc_proto].pr_start(sc, m));
  377 }
  378 
  379 static struct mbuf *
  380 lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
  381 {
  382 
  383         return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
  384 }
  385 
  386 static int
  387 lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
  388 {
  389 
  390         if (lagg_protos[sc->sc_proto].pr_addport == NULL)
  391                 return (0);
  392         else
  393                 return (lagg_protos[sc->sc_proto].pr_addport(lp));
  394 }
  395 
  396 static void
  397 lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
  398 {
  399 
  400         if (lagg_protos[sc->sc_proto].pr_delport != NULL)
  401                 lagg_protos[sc->sc_proto].pr_delport(lp);
  402 }
  403 
  404 static void
  405 lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
  406 {
  407 
  408         if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
  409                 lagg_protos[sc->sc_proto].pr_linkstate(lp);
  410 }
  411 
  412 static void
  413 lagg_proto_init(struct lagg_softc *sc)
  414 {
  415 
  416         if (lagg_protos[sc->sc_proto].pr_init != NULL)
  417                 lagg_protos[sc->sc_proto].pr_init(sc);
  418 }
  419 
  420 static void
  421 lagg_proto_stop(struct lagg_softc *sc)
  422 {
  423 
  424         if (lagg_protos[sc->sc_proto].pr_stop != NULL)
  425                 lagg_protos[sc->sc_proto].pr_stop(sc);
  426 }
  427 
  428 static void
  429 lagg_proto_lladdr(struct lagg_softc *sc)
  430 {
  431 
  432         if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
  433                 lagg_protos[sc->sc_proto].pr_lladdr(sc);
  434 }
  435 
  436 static void
  437 lagg_proto_request(struct lagg_softc *sc, void *v)
  438 {
  439 
  440         if (lagg_protos[sc->sc_proto].pr_request != NULL)
  441                 lagg_protos[sc->sc_proto].pr_request(sc, v);
  442 }
  443 
  444 static void
  445 lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
  446 {
  447 
  448         if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
  449                 lagg_protos[sc->sc_proto].pr_portreq(lp, v);
  450 }
  451 
  452 /*
  453  * This routine is run via an vlan
  454  * config EVENT
  455  */
  456 static void
  457 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  458 {
  459         struct lagg_softc *sc = ifp->if_softc;
  460         struct lagg_port *lp;
  461 
  462         if (ifp->if_softc !=  arg)   /* Not our event */
  463                 return;
  464 
  465         LAGG_XLOCK(sc);
  466         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  467                 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
  468         LAGG_XUNLOCK(sc);
  469 }
  470 
  471 /*
  472  * This routine is run via an vlan
  473  * unconfig EVENT
  474  */
  475 static void
  476 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  477 {
  478         struct lagg_softc *sc = ifp->if_softc;
  479         struct lagg_port *lp;
  480 
  481         if (ifp->if_softc !=  arg)   /* Not our event */
  482                 return;
  483 
  484         LAGG_XLOCK(sc);
  485         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  486                 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
  487         LAGG_XUNLOCK(sc);
  488 }
  489 
  490 static int
  491 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
  492 {
  493         struct iflaggparam iflp;
  494         struct lagg_softc *sc;
  495         struct ifnet *ifp;
  496         int if_type;
  497         int error;
  498         static const uint8_t eaddr[LAGG_ADDR_LEN];
  499 
  500         if (params != NULL) {
  501                 error = copyin(params, &iflp, sizeof(iflp));
  502                 if (error)
  503                         return (error);
  504 
  505                 switch (iflp.lagg_type) {
  506                 case LAGG_TYPE_ETHERNET:
  507                         if_type = IFT_ETHER;
  508                         break;
  509                 case LAGG_TYPE_INFINIBAND:
  510                         if_type = IFT_INFINIBAND;
  511                         break;
  512                 default:
  513                         return (EINVAL);
  514                 }
  515         } else {
  516                 if_type = IFT_ETHER;
  517         }
  518 
  519         sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
  520         ifp = sc->sc_ifp = if_alloc(if_type);
  521         if (ifp == NULL) {
  522                 free(sc, M_LAGG);
  523                 return (ENOSPC);
  524         }
  525         LAGG_SX_INIT(sc);
  526 
  527         mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF);
  528         callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0);
  529 
  530         LAGG_XLOCK(sc);
  531         if (V_def_use_flowid)
  532                 sc->sc_opts |= LAGG_OPT_USE_FLOWID;
  533         sc->flowid_shift = V_def_flowid_shift;
  534 
  535         /* Hash all layers by default */
  536         sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
  537 
  538         lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
  539 
  540         CK_SLIST_INIT(&sc->sc_ports);
  541 
  542         switch (if_type) {
  543         case IFT_ETHER:
  544                 /* Initialise pseudo media types */
  545                 ifmedia_init(&sc->sc_media, 0, lagg_media_change,
  546                     lagg_media_status);
  547                 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
  548                 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
  549 
  550                 if_initname(ifp, laggname, unit);
  551                 ifp->if_transmit = lagg_transmit_ethernet;
  552                 break;
  553         case IFT_INFINIBAND:
  554                 if_initname(ifp, laggname, unit);
  555                 ifp->if_transmit = lagg_transmit_infiniband;
  556                 break;
  557         default:
  558                 break;
  559         }
  560         ifp->if_softc = sc;
  561         ifp->if_qflush = lagg_qflush;
  562         ifp->if_init = lagg_init;
  563         ifp->if_ioctl = lagg_ioctl;
  564         ifp->if_get_counter = lagg_get_counter;
  565         ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
  566 #ifdef RATELIMIT
  567         ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
  568 #endif
  569         ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
  570 
  571         /*
  572          * Attach as an ordinary ethernet device, children will be attached
  573          * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG.
  574          */
  575         switch (if_type) {
  576         case IFT_ETHER:
  577                 ether_ifattach(ifp, eaddr);
  578                 break;
  579         case IFT_INFINIBAND:
  580                 infiniband_ifattach(ifp, eaddr, sc->sc_bcast_addr);
  581                 break;
  582         default:
  583                 break;
  584         }
  585 
  586         sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
  587                 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
  588         sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
  589                 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
  590 
  591         /* Insert into the global list of laggs */
  592         LAGG_LIST_LOCK();
  593         SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
  594         LAGG_LIST_UNLOCK();
  595         LAGG_XUNLOCK(sc);
  596 
  597         return (0);
  598 }
  599 
  600 static void
  601 lagg_clone_destroy(struct ifnet *ifp)
  602 {
  603         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  604         struct lagg_port *lp;
  605 
  606         LAGG_XLOCK(sc);
  607         sc->sc_destroying = 1;
  608         lagg_stop(sc);
  609         ifp->if_flags &= ~IFF_UP;
  610 
  611         EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
  612         EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
  613 
  614         /* Shutdown and remove lagg ports */
  615         while ((lp = CK_SLIST_FIRST(&sc->sc_ports)) != NULL)
  616                 lagg_port_destroy(lp, 1);
  617 
  618         /* Unhook the aggregation protocol */
  619         lagg_proto_detach(sc);
  620         LAGG_XUNLOCK(sc);
  621 
  622         switch (ifp->if_type) {
  623         case IFT_ETHER:
  624                 ifmedia_removeall(&sc->sc_media);
  625                 ether_ifdetach(ifp);
  626                 break;
  627         case IFT_INFINIBAND:
  628                 infiniband_ifdetach(ifp);
  629                 break;
  630         default:
  631                 break;
  632         }
  633         if_free(ifp);
  634 
  635         LAGG_LIST_LOCK();
  636         SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
  637         LAGG_LIST_UNLOCK();
  638 
  639         mtx_destroy(&sc->sc_mtx);
  640         LAGG_SX_DESTROY(sc);
  641         free(sc, M_LAGG);
  642 }
  643 
  644 static void
  645 lagg_capabilities(struct lagg_softc *sc)
  646 {
  647         struct lagg_port *lp;
  648         int cap, ena, pena;
  649         uint64_t hwa;
  650         struct ifnet_hw_tsomax hw_tsomax;
  651 
  652         LAGG_XLOCK_ASSERT(sc);
  653 
  654         /* Get common enabled capabilities for the lagg ports */
  655         ena = ~0;
  656         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  657                 ena &= lp->lp_ifp->if_capenable;
  658         ena = (ena == ~0 ? 0 : ena);
  659 
  660         /*
  661          * Apply common enabled capabilities back to the lagg ports.
  662          * May require several iterations if they are dependent.
  663          */
  664         do {
  665                 pena = ena;
  666                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  667                         lagg_setcaps(lp, ena);
  668                         ena &= lp->lp_ifp->if_capenable;
  669                 }
  670         } while (pena != ena);
  671 
  672         /* Get other capabilities from the lagg ports */
  673         cap = ~0;
  674         hwa = ~(uint64_t)0;
  675         memset(&hw_tsomax, 0, sizeof(hw_tsomax));
  676         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  677                 cap &= lp->lp_ifp->if_capabilities;
  678                 hwa &= lp->lp_ifp->if_hwassist;
  679                 if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
  680         }
  681         cap = (cap == ~0 ? 0 : cap);
  682         hwa = (hwa == ~(uint64_t)0 ? 0 : hwa);
  683 
  684         if (sc->sc_ifp->if_capabilities != cap ||
  685             sc->sc_ifp->if_capenable != ena ||
  686             sc->sc_ifp->if_hwassist != hwa ||
  687             if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
  688                 sc->sc_ifp->if_capabilities = cap;
  689                 sc->sc_ifp->if_capenable = ena;
  690                 sc->sc_ifp->if_hwassist = hwa;
  691                 getmicrotime(&sc->sc_ifp->if_lastchange);
  692 
  693                 if (sc->sc_ifflags & IFF_DEBUG)
  694                         if_printf(sc->sc_ifp,
  695                             "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
  696         }
  697 }
  698 
  699 static int
  700 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
  701 {
  702         struct lagg_softc *sc_ptr;
  703         struct lagg_port *lp, *tlp;
  704         struct ifreq ifr;
  705         int error, i, oldmtu;
  706         int if_type;
  707         uint64_t *pval;
  708 
  709         LAGG_XLOCK_ASSERT(sc);
  710 
  711         if (sc->sc_ifp == ifp) {
  712                 if_printf(sc->sc_ifp,
  713                     "cannot add a lagg to itself as a port\n");
  714                 return (EINVAL);
  715         }
  716 
  717         /* Limit the maximal number of lagg ports */
  718         if (sc->sc_count >= LAGG_MAX_PORTS)
  719                 return (ENOSPC);
  720 
  721         /* Check if port has already been associated to a lagg */
  722         if (ifp->if_lagg != NULL) {
  723                 /* Port is already in the current lagg? */
  724                 lp = (struct lagg_port *)ifp->if_lagg;
  725                 if (lp->lp_softc == sc)
  726                         return (EEXIST);
  727                 return (EBUSY);
  728         }
  729 
  730         switch (sc->sc_ifp->if_type) {
  731         case IFT_ETHER:
  732                 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
  733                 if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
  734                         return (EPROTONOSUPPORT);
  735                 if_type = IFT_IEEE8023ADLAG;
  736                 break;
  737         case IFT_INFINIBAND:
  738                 /* XXX Disallow non-infiniband interfaces */
  739                 if (ifp->if_type != IFT_INFINIBAND)
  740                         return (EPROTONOSUPPORT);
  741                 if_type = IFT_INFINIBANDLAG;
  742                 break;
  743         default:
  744                 break;
  745         }
  746 
  747         /* Allow the first Ethernet member to define the MTU */
  748         oldmtu = -1;
  749         if (CK_SLIST_EMPTY(&sc->sc_ports)) {
  750                 sc->sc_ifp->if_mtu = ifp->if_mtu;
  751         } else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
  752                 if (ifp->if_ioctl == NULL) {
  753                         if_printf(sc->sc_ifp, "cannot change MTU for %s\n",
  754                             ifp->if_xname);
  755                         return (EINVAL);
  756                 }
  757                 oldmtu = ifp->if_mtu;
  758                 strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name));
  759                 ifr.ifr_mtu = sc->sc_ifp->if_mtu;
  760                 error = (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
  761                 if (error != 0) {
  762                         if_printf(sc->sc_ifp, "invalid MTU for %s\n",
  763                             ifp->if_xname);
  764                         return (error);
  765                 }
  766                 ifr.ifr_mtu = oldmtu;
  767         }
  768 
  769         lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO);
  770         lp->lp_softc = sc;
  771 
  772         /* Check if port is a stacked lagg */
  773         LAGG_LIST_LOCK();
  774         SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
  775                 if (ifp == sc_ptr->sc_ifp) {
  776                         LAGG_LIST_UNLOCK();
  777                         free(lp, M_LAGG);
  778                         if (oldmtu != -1)
  779                                 (*ifp->if_ioctl)(ifp, SIOCSIFMTU,
  780                                     (caddr_t)&ifr);
  781                         return (EINVAL);
  782                         /* XXX disable stacking for the moment, its untested */
  783 #ifdef LAGG_PORT_STACKING
  784                         lp->lp_flags |= LAGG_PORT_STACK;
  785                         if (lagg_port_checkstacking(sc_ptr) >=
  786                             LAGG_MAX_STACKING) {
  787                                 LAGG_LIST_UNLOCK();
  788                                 free(lp, M_LAGG);
  789                                 if (oldmtu != -1)
  790                                         (*ifp->if_ioctl)(ifp, SIOCSIFMTU,
  791                                             (caddr_t)&ifr);
  792                                 return (E2BIG);
  793                         }
  794 #endif
  795                 }
  796         }
  797         LAGG_LIST_UNLOCK();
  798 
  799         if_ref(ifp);
  800         lp->lp_ifp = ifp;
  801 
  802         bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen);
  803         lp->lp_ifcapenable = ifp->if_capenable;
  804         if (CK_SLIST_EMPTY(&sc->sc_ports)) {
  805                 bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
  806                 lagg_proto_lladdr(sc);
  807                 EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  808         } else {
  809                 if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
  810         }
  811         lagg_setflags(lp, 1);
  812 
  813         if (CK_SLIST_EMPTY(&sc->sc_ports))
  814                 sc->sc_primary = lp;
  815 
  816         /* Change the interface type */
  817         lp->lp_iftype = ifp->if_type;
  818         ifp->if_type = if_type;
  819         ifp->if_lagg = lp;
  820         lp->lp_ioctl = ifp->if_ioctl;
  821         ifp->if_ioctl = lagg_port_ioctl;
  822         lp->lp_output = ifp->if_output;
  823         ifp->if_output = lagg_port_output;
  824 
  825         /* Read port counters */
  826         pval = lp->port_counters.val;
  827         for (i = 0; i < IFCOUNTERS; i++, pval++)
  828                 *pval = ifp->if_get_counter(ifp, i);
  829 
  830         /*
  831          * Insert into the list of ports.
  832          * Keep ports sorted by if_index. It is handy, when configuration
  833          * is predictable and `ifconfig laggN create ...` command
  834          * will lead to the same result each time.
  835          */
  836         CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
  837                 if (tlp->lp_ifp->if_index < ifp->if_index && (
  838                     CK_SLIST_NEXT(tlp, lp_entries) == NULL ||
  839                     ((struct  lagg_port*)CK_SLIST_NEXT(tlp, lp_entries))->lp_ifp->if_index >
  840                     ifp->if_index))
  841                         break;
  842         }
  843         if (tlp != NULL)
  844                 CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries);
  845         else
  846                 CK_SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
  847         sc->sc_count++;
  848 
  849         lagg_setmulti(lp);
  850 
  851 
  852         if ((error = lagg_proto_addport(sc, lp)) != 0) {
  853                 /* Remove the port, without calling pr_delport. */
  854                 lagg_port_destroy(lp, 0);
  855                 if (oldmtu != -1)
  856                         (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
  857                 return (error);
  858         }
  859 
  860         /* Update lagg capabilities */
  861         lagg_capabilities(sc);
  862         lagg_linkstate(sc);
  863 
  864         return (0);
  865 }
  866 
  867 #ifdef LAGG_PORT_STACKING
  868 static int
  869 lagg_port_checkstacking(struct lagg_softc *sc)
  870 {
  871         struct lagg_softc *sc_ptr;
  872         struct lagg_port *lp;
  873         int m = 0;
  874 
  875         LAGG_SXLOCK_ASSERT(sc);
  876         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  877                 if (lp->lp_flags & LAGG_PORT_STACK) {
  878                         sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
  879                         m = MAX(m, lagg_port_checkstacking(sc_ptr));
  880                 }
  881         }
  882 
  883         return (m + 1);
  884 }
  885 #endif
  886 
  887 static void
  888 lagg_port_destroy_cb(epoch_context_t ec)
  889 {
  890         struct lagg_port *lp;
  891         struct ifnet *ifp;
  892 
  893         lp = __containerof(ec, struct lagg_port, lp_epoch_ctx);
  894         ifp = lp->lp_ifp;
  895 
  896         if_rele(ifp);
  897         free(lp, M_LAGG);
  898 }
  899 
  900 static int
  901 lagg_port_destroy(struct lagg_port *lp, int rundelport)
  902 {
  903         struct lagg_softc *sc = lp->lp_softc;
  904         struct lagg_port *lp_ptr, *lp0;
  905         struct ifnet *ifp = lp->lp_ifp;
  906         uint64_t *pval, vdiff;
  907         int i;
  908 
  909         LAGG_XLOCK_ASSERT(sc);
  910 
  911         if (rundelport)
  912                 lagg_proto_delport(sc, lp);
  913 
  914         if (lp->lp_detaching == 0)
  915                 lagg_clrmulti(lp);
  916 
  917         /* Restore interface */
  918         ifp->if_type = lp->lp_iftype;
  919         ifp->if_ioctl = lp->lp_ioctl;
  920         ifp->if_output = lp->lp_output;
  921         ifp->if_lagg = NULL;
  922 
  923         /* Update detached port counters */
  924         pval = lp->port_counters.val;
  925         for (i = 0; i < IFCOUNTERS; i++, pval++) {
  926                 vdiff = ifp->if_get_counter(ifp, i) - *pval;
  927                 sc->detached_counters.val[i] += vdiff;
  928         }
  929 
  930         /* Finally, remove the port from the lagg */
  931         CK_SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
  932         sc->sc_count--;
  933 
  934         /* Update the primary interface */
  935         if (lp == sc->sc_primary) {
  936                 uint8_t lladdr[LAGG_ADDR_LEN];
  937 
  938                 if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL)
  939                         bzero(&lladdr, LAGG_ADDR_LEN);
  940                 else
  941                         bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN);
  942                 sc->sc_primary = lp0;
  943                 if (sc->sc_destroying == 0) {
  944                         bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen);
  945                         lagg_proto_lladdr(sc);
  946                         EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  947 
  948                         /*
  949                          * Update lladdr for each port (new primary needs update
  950                          * as well, to switch from old lladdr to its 'real' one).
  951                          * We can skip this if the lagg is being destroyed.
  952                          */
  953                         CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
  954                                 if_setlladdr(lp_ptr->lp_ifp, lladdr,
  955                                     lp_ptr->lp_ifp->if_addrlen);
  956                 }
  957         }
  958 
  959         if (lp->lp_ifflags)
  960                 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
  961 
  962         if (lp->lp_detaching == 0) {
  963                 lagg_setflags(lp, 0);
  964                 lagg_setcaps(lp, lp->lp_ifcapenable);
  965                 if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen);
  966         }
  967 
  968         /*
  969          * free port and release it's ifnet reference after a grace period has
  970          * elapsed.
  971          */
  972         epoch_call(net_epoch_preempt, &lp->lp_epoch_ctx, lagg_port_destroy_cb);
  973         /* Update lagg capabilities */
  974         lagg_capabilities(sc);
  975         lagg_linkstate(sc);
  976 
  977         return (0);
  978 }
  979 
  980 static int
  981 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  982 {
  983         struct lagg_reqport *rp = (struct lagg_reqport *)data;
  984         struct lagg_softc *sc;
  985         struct lagg_port *lp = NULL;
  986         int error = 0;
  987 
  988         /* Should be checked by the caller */
  989         switch (ifp->if_type) {
  990         case IFT_IEEE8023ADLAG:
  991         case IFT_INFINIBANDLAG:
  992                 if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
  993                         goto fallback;
  994                 break;
  995         default:
  996                 goto fallback;
  997         }
  998 
  999         switch (cmd) {
 1000         case SIOCGLAGGPORT:
 1001                 if (rp->rp_portname[0] == '\0' ||
 1002                     ifunit(rp->rp_portname) != ifp) {
 1003                         error = EINVAL;
 1004                         break;
 1005                 }
 1006 
 1007                 LAGG_RLOCK();
 1008                 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
 1009                         error = ENOENT;
 1010                         LAGG_RUNLOCK();
 1011                         break;
 1012                 }
 1013 
 1014                 lagg_port2req(lp, rp);
 1015                 LAGG_RUNLOCK();
 1016                 break;
 1017 
 1018         case SIOCSIFCAP:
 1019                 if (lp->lp_ioctl == NULL) {
 1020                         error = EINVAL;
 1021                         break;
 1022                 }
 1023                 error = (*lp->lp_ioctl)(ifp, cmd, data);
 1024                 if (error)
 1025                         break;
 1026 
 1027                 /* Update lagg interface capabilities */
 1028                 LAGG_XLOCK(sc);
 1029                 lagg_capabilities(sc);
 1030                 LAGG_XUNLOCK(sc);
 1031                 VLAN_CAPABILITIES(sc->sc_ifp);
 1032                 break;
 1033 
 1034         case SIOCSIFMTU:
 1035                 /* Do not allow the MTU to be changed once joined */
 1036                 error = EINVAL;
 1037                 break;
 1038 
 1039         default:
 1040                 goto fallback;
 1041         }
 1042 
 1043         return (error);
 1044 
 1045 fallback:
 1046         if (lp != NULL && lp->lp_ioctl != NULL)
 1047                 return ((*lp->lp_ioctl)(ifp, cmd, data));
 1048 
 1049         return (EINVAL);
 1050 }
 1051 
 1052 /*
 1053  * Requests counter @cnt data. 
 1054  *
 1055  * Counter value is calculated the following way:
 1056  * 1) for each port, sum  difference between current and "initial" measurements.
 1057  * 2) add lagg logical interface counters.
 1058  * 3) add data from detached_counters array.
 1059  *
 1060  * We also do the following things on ports attach/detach:
 1061  * 1) On port attach we store all counters it has into port_counter array. 
 1062  * 2) On port detach we add the different between "initial" and
 1063  *   current counters data to detached_counters array.
 1064  */
 1065 static uint64_t
 1066 lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
 1067 {
 1068         struct lagg_softc *sc;
 1069         struct lagg_port *lp;
 1070         struct ifnet *lpifp;
 1071         uint64_t newval, oldval, vsum;
 1072 
 1073         /* Revise this when we've got non-generic counters. */
 1074         KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 1075 
 1076         sc = (struct lagg_softc *)ifp->if_softc;
 1077 
 1078         vsum = 0;
 1079         LAGG_RLOCK();
 1080         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1081                 /* Saved attached value */
 1082                 oldval = lp->port_counters.val[cnt];
 1083                 /* current value */
 1084                 lpifp = lp->lp_ifp;
 1085                 newval = lpifp->if_get_counter(lpifp, cnt);
 1086                 /* Calculate diff and save new */
 1087                 vsum += newval - oldval;
 1088         }
 1089         LAGG_RUNLOCK();
 1090 
 1091         /*
 1092          * Add counter data which might be added by upper
 1093          * layer protocols operating on logical interface.
 1094          */
 1095         vsum += if_get_counter_default(ifp, cnt);
 1096 
 1097         /*
 1098          * Add counter data from detached ports counters
 1099          */
 1100         vsum += sc->detached_counters.val[cnt];
 1101 
 1102 
 1103         return (vsum);
 1104 }
 1105 
 1106 /*
 1107  * For direct output to child ports.
 1108  */
 1109 static int
 1110 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
 1111         const struct sockaddr *dst, struct route *ro)
 1112 {
 1113         struct lagg_port *lp = ifp->if_lagg;
 1114 
 1115         switch (dst->sa_family) {
 1116                 case pseudo_AF_HDRCMPLT:
 1117                 case AF_UNSPEC:
 1118                         if (lp != NULL)
 1119                                 return ((*lp->lp_output)(ifp, m, dst, ro));
 1120         }
 1121 
 1122         /* drop any other frames */
 1123         m_freem(m);
 1124         return (ENETDOWN);
 1125 }
 1126 
 1127 static void
 1128 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
 1129 {
 1130         struct lagg_port *lp;
 1131         struct lagg_softc *sc;
 1132 
 1133         if ((lp = ifp->if_lagg) == NULL)
 1134                 return;
 1135         /* If the ifnet is just being renamed, don't do anything. */
 1136         if (ifp->if_flags & IFF_RENAMING)
 1137                 return;
 1138 
 1139         sc = lp->lp_softc;
 1140 
 1141         LAGG_XLOCK(sc);
 1142         lp->lp_detaching = 1;
 1143         lagg_port_destroy(lp, 1);
 1144         LAGG_XUNLOCK(sc);
 1145         VLAN_CAPABILITIES(sc->sc_ifp);
 1146 }
 1147 
 1148 static void
 1149 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
 1150 {
 1151         struct lagg_softc *sc = lp->lp_softc;
 1152 
 1153         strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
 1154         strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
 1155         rp->rp_prio = lp->lp_prio;
 1156         rp->rp_flags = lp->lp_flags;
 1157         lagg_proto_portreq(sc, lp, &rp->rp_psc);
 1158 
 1159         /* Add protocol specific flags */
 1160         switch (sc->sc_proto) {
 1161                 case LAGG_PROTO_FAILOVER:
 1162                         if (lp == sc->sc_primary)
 1163                                 rp->rp_flags |= LAGG_PORT_MASTER;
 1164                         if (lp == lagg_link_active(sc, sc->sc_primary))
 1165                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1166                         break;
 1167 
 1168                 case LAGG_PROTO_ROUNDROBIN:
 1169                 case LAGG_PROTO_LOADBALANCE:
 1170                 case LAGG_PROTO_BROADCAST:
 1171                         if (LAGG_PORTACTIVE(lp))
 1172                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1173                         break;
 1174 
 1175                 case LAGG_PROTO_LACP:
 1176                         /* LACP has a different definition of active */
 1177                         if (lacp_isactive(lp))
 1178                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1179                         if (lacp_iscollecting(lp))
 1180                                 rp->rp_flags |= LAGG_PORT_COLLECTING;
 1181                         if (lacp_isdistributing(lp))
 1182                                 rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
 1183                         break;
 1184         }
 1185 
 1186 }
 1187 
 1188 static void
 1189 lagg_watchdog_infiniband(void *arg)
 1190 {
 1191         struct lagg_softc *sc;
 1192         struct lagg_port *lp;
 1193         struct ifnet *ifp;
 1194         struct ifnet *lp_ifp;
 1195 
 1196         sc = arg;
 1197 
 1198         /*
 1199          * Because infiniband nodes have a fixed MAC address, which is
 1200          * generated by the so-called GID, we need to regularly update
 1201          * the link level address of the parent lagg<N> device when
 1202          * the active port changes. Possibly we could piggy-back on
 1203          * link up/down events aswell, but using a timer also provides
 1204          * a guarantee against too frequent events. This operation
 1205          * does not have to be atomic.
 1206          */
 1207         LAGG_RLOCK();
 1208         lp = lagg_link_active(sc, sc->sc_primary);
 1209         if (lp != NULL) {
 1210                 ifp = sc->sc_ifp;
 1211                 lp_ifp = lp->lp_ifp;
 1212 
 1213                 if (ifp != NULL && lp_ifp != NULL &&
 1214                     (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0 ||
 1215                      memcmp(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen) != 0)) {
 1216                         memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen);
 1217                         memcpy(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen);
 1218 
 1219                         CURVNET_SET(ifp->if_vnet);
 1220                         EVENTHANDLER_INVOKE(iflladdr_event, ifp);
 1221                         CURVNET_RESTORE();
 1222                 }
 1223         }
 1224         LAGG_RUNLOCK();
 1225 
 1226         callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg);
 1227 }
 1228 
 1229 static void
 1230 lagg_init(void *xsc)
 1231 {
 1232         struct lagg_softc *sc = (struct lagg_softc *)xsc;
 1233         struct ifnet *ifp = sc->sc_ifp;
 1234         struct lagg_port *lp;
 1235 
 1236         LAGG_XLOCK(sc);
 1237         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 1238                 LAGG_XUNLOCK(sc);
 1239                 return;
 1240         }
 1241 
 1242         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 1243 
 1244         /*
 1245          * Update the port lladdrs if needed.
 1246          * This might be if_setlladdr() notification
 1247          * that lladdr has been changed.
 1248          */
 1249         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1250                 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
 1251                     ifp->if_addrlen) != 0)
 1252                         if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen);
 1253         }
 1254 
 1255         lagg_proto_init(sc);
 1256 
 1257         if (ifp->if_type == IFT_INFINIBAND) {
 1258                 mtx_lock(&sc->sc_mtx);
 1259                 lagg_watchdog_infiniband(sc);
 1260                 mtx_unlock(&sc->sc_mtx);
 1261         }
 1262 
 1263         LAGG_XUNLOCK(sc);
 1264 }
 1265 
 1266 static void
 1267 lagg_stop(struct lagg_softc *sc)
 1268 {
 1269         struct ifnet *ifp = sc->sc_ifp;
 1270 
 1271         LAGG_XLOCK_ASSERT(sc);
 1272 
 1273         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 1274                 return;
 1275 
 1276         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 1277 
 1278         lagg_proto_stop(sc);
 1279 
 1280         mtx_lock(&sc->sc_mtx);
 1281         callout_stop(&sc->sc_watchdog);
 1282         mtx_unlock(&sc->sc_mtx);
 1283 
 1284         callout_drain(&sc->sc_watchdog);
 1285 }
 1286 
 1287 static int
 1288 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 1289 {
 1290         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1291         struct lagg_reqall *ra = (struct lagg_reqall *)data;
 1292         struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
 1293         struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
 1294         struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
 1295         struct ifreq *ifr = (struct ifreq *)data;
 1296         struct lagg_port *lp;
 1297         struct ifnet *tpif;
 1298         struct thread *td = curthread;
 1299         char *buf, *outbuf;
 1300         int count, buflen, len, error = 0, oldmtu;
 1301 
 1302         bzero(&rpbuf, sizeof(rpbuf));
 1303 
 1304         switch (cmd) {
 1305         case SIOCGLAGG:
 1306                 LAGG_XLOCK(sc);
 1307                 buflen = sc->sc_count * sizeof(struct lagg_reqport);
 1308                 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 1309                 ra->ra_proto = sc->sc_proto;
 1310                 lagg_proto_request(sc, &ra->ra_psc);
 1311                 count = 0;
 1312                 buf = outbuf;
 1313                 len = min(ra->ra_size, buflen);
 1314                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1315                         if (len < sizeof(rpbuf))
 1316                                 break;
 1317 
 1318                         lagg_port2req(lp, &rpbuf);
 1319                         memcpy(buf, &rpbuf, sizeof(rpbuf));
 1320                         count++;
 1321                         buf += sizeof(rpbuf);
 1322                         len -= sizeof(rpbuf);
 1323                 }
 1324                 LAGG_XUNLOCK(sc);
 1325                 ra->ra_ports = count;
 1326                 ra->ra_size = count * sizeof(rpbuf);
 1327                 error = copyout(outbuf, ra->ra_port, ra->ra_size);
 1328                 free(outbuf, M_TEMP);
 1329                 break;
 1330         case SIOCSLAGG:
 1331                 error = priv_check(td, PRIV_NET_LAGG);
 1332                 if (error)
 1333                         break;
 1334                 if (ra->ra_proto >= LAGG_PROTO_MAX) {
 1335                         error = EPROTONOSUPPORT;
 1336                         break;
 1337                 }
 1338                 /* Infiniband only supports the failover protocol. */
 1339                 if (ra->ra_proto != LAGG_PROTO_FAILOVER &&
 1340                     ifp->if_type == IFT_INFINIBAND) {
 1341                         error = EPROTONOSUPPORT;
 1342                         break;
 1343                 }
 1344                 LAGG_XLOCK(sc);
 1345                 lagg_proto_detach(sc);
 1346                 LAGG_UNLOCK_ASSERT();
 1347                 lagg_proto_attach(sc, ra->ra_proto);
 1348                 LAGG_XUNLOCK(sc);
 1349                 break;
 1350         case SIOCGLAGGOPTS:
 1351                 LAGG_XLOCK(sc);
 1352                 ro->ro_opts = sc->sc_opts;
 1353                 if (sc->sc_proto == LAGG_PROTO_LACP) {
 1354                         struct lacp_softc *lsc;
 1355 
 1356                         lsc = (struct lacp_softc *)sc->sc_psc;
 1357                         if (lsc->lsc_debug.lsc_tx_test != 0)
 1358                                 ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
 1359                         if (lsc->lsc_debug.lsc_rx_test != 0)
 1360                                 ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
 1361                         if (lsc->lsc_strict_mode != 0)
 1362                                 ro->ro_opts |= LAGG_OPT_LACP_STRICT;
 1363                         if (lsc->lsc_fast_timeout != 0)
 1364                                 ro->ro_opts |= LAGG_OPT_LACP_FAST_TIMO;
 1365 
 1366                         ro->ro_active = sc->sc_active;
 1367                 } else {
 1368                         ro->ro_active = 0;
 1369                         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1370                                 ro->ro_active += LAGG_PORTACTIVE(lp);
 1371                 }
 1372                 ro->ro_bkt = sc->sc_stride;
 1373                 ro->ro_flapping = sc->sc_flapping;
 1374                 ro->ro_flowid_shift = sc->flowid_shift;
 1375                 LAGG_XUNLOCK(sc);
 1376                 break;
 1377         case SIOCSLAGGOPTS:
 1378                 error = priv_check(td, PRIV_NET_LAGG);
 1379                 if (error)
 1380                         break;
 1381 
 1382                 /*
 1383                  * The stride option was added without defining a corresponding
 1384                  * LAGG_OPT flag, so handle a non-zero value before checking
 1385                  * anything else to preserve compatibility.
 1386                  */
 1387                 LAGG_XLOCK(sc);
 1388                 if (ro->ro_opts == 0 && ro->ro_bkt != 0) {
 1389                         if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN) {
 1390                                 LAGG_XUNLOCK(sc);
 1391                                 error = EINVAL;
 1392                                 break;
 1393                         }
 1394                         sc->sc_stride = ro->ro_bkt;
 1395                 }
 1396                 if (ro->ro_opts == 0) {
 1397                         LAGG_XUNLOCK(sc);
 1398                         break;
 1399                 }
 1400 
 1401                 /*
 1402                  * Set options.  LACP options are stored in sc->sc_psc,
 1403                  * not in sc_opts.
 1404                  */
 1405                 int valid, lacp;
 1406 
 1407                 switch (ro->ro_opts) {
 1408                 case LAGG_OPT_USE_FLOWID:
 1409                 case -LAGG_OPT_USE_FLOWID:
 1410                 case LAGG_OPT_FLOWIDSHIFT:
 1411                 case LAGG_OPT_RR_LIMIT:
 1412                         valid = 1;
 1413                         lacp = 0;
 1414                         break;
 1415                 case LAGG_OPT_LACP_TXTEST:
 1416                 case -LAGG_OPT_LACP_TXTEST:
 1417                 case LAGG_OPT_LACP_RXTEST:
 1418                 case -LAGG_OPT_LACP_RXTEST:
 1419                 case LAGG_OPT_LACP_STRICT:
 1420                 case -LAGG_OPT_LACP_STRICT:
 1421                 case LAGG_OPT_LACP_FAST_TIMO:
 1422                 case -LAGG_OPT_LACP_FAST_TIMO:
 1423                         valid = lacp = 1;
 1424                         break;
 1425                 default:
 1426                         valid = lacp = 0;
 1427                         break;
 1428                 }
 1429 
 1430                 if (valid == 0 ||
 1431                     (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
 1432                         /* Invalid combination of options specified. */
 1433                         error = EINVAL;
 1434                         LAGG_XUNLOCK(sc);
 1435                         break;  /* Return from SIOCSLAGGOPTS. */ 
 1436                 }
 1437 
 1438                 /*
 1439                  * Store new options into sc->sc_opts except for
 1440                  * FLOWIDSHIFT, RR and LACP options.
 1441                  */
 1442                 if (lacp == 0) {
 1443                         if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
 1444                                 sc->flowid_shift = ro->ro_flowid_shift;
 1445                         else if (ro->ro_opts == LAGG_OPT_RR_LIMIT) {
 1446                                 if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN ||
 1447                                     ro->ro_bkt == 0) {
 1448                                         error = EINVAL;
 1449                                         LAGG_XUNLOCK(sc);
 1450                                         break;
 1451                                 }
 1452                                 sc->sc_stride = ro->ro_bkt;
 1453                         } else if (ro->ro_opts > 0)
 1454                                 sc->sc_opts |= ro->ro_opts;
 1455                         else
 1456                                 sc->sc_opts &= ~ro->ro_opts;
 1457                 } else {
 1458                         struct lacp_softc *lsc;
 1459                         struct lacp_port *lp;
 1460 
 1461                         lsc = (struct lacp_softc *)sc->sc_psc;
 1462 
 1463                         switch (ro->ro_opts) {
 1464                         case LAGG_OPT_LACP_TXTEST:
 1465                                 lsc->lsc_debug.lsc_tx_test = 1;
 1466                                 break;
 1467                         case -LAGG_OPT_LACP_TXTEST:
 1468                                 lsc->lsc_debug.lsc_tx_test = 0;
 1469                                 break;
 1470                         case LAGG_OPT_LACP_RXTEST:
 1471                                 lsc->lsc_debug.lsc_rx_test = 1;
 1472                                 break;
 1473                         case -LAGG_OPT_LACP_RXTEST:
 1474                                 lsc->lsc_debug.lsc_rx_test = 0;
 1475                                 break;
 1476                         case LAGG_OPT_LACP_STRICT:
 1477                                 lsc->lsc_strict_mode = 1;
 1478                                 break;
 1479                         case -LAGG_OPT_LACP_STRICT:
 1480                                 lsc->lsc_strict_mode = 0;
 1481                                 break;
 1482                         case LAGG_OPT_LACP_FAST_TIMO:
 1483                                 LACP_LOCK(lsc);
 1484                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1485                                         lp->lp_state |= LACP_STATE_TIMEOUT;
 1486                                 LACP_UNLOCK(lsc);
 1487                                 lsc->lsc_fast_timeout = 1;
 1488                                 break;
 1489                         case -LAGG_OPT_LACP_FAST_TIMO:
 1490                                 LACP_LOCK(lsc);
 1491                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1492                                         lp->lp_state &= ~LACP_STATE_TIMEOUT;
 1493                                 LACP_UNLOCK(lsc);
 1494                                 lsc->lsc_fast_timeout = 0;
 1495                                 break;
 1496                         }
 1497                 }
 1498                 LAGG_XUNLOCK(sc);
 1499                 break;
 1500         case SIOCGLAGGFLAGS:
 1501                 rf->rf_flags = 0;
 1502                 LAGG_XLOCK(sc);
 1503                 if (sc->sc_flags & MBUF_HASHFLAG_L2)
 1504                         rf->rf_flags |= LAGG_F_HASHL2;
 1505                 if (sc->sc_flags & MBUF_HASHFLAG_L3)
 1506                         rf->rf_flags |= LAGG_F_HASHL3;
 1507                 if (sc->sc_flags & MBUF_HASHFLAG_L4)
 1508                         rf->rf_flags |= LAGG_F_HASHL4;
 1509                 LAGG_XUNLOCK(sc);
 1510                 break;
 1511         case SIOCSLAGGHASH:
 1512                 error = priv_check(td, PRIV_NET_LAGG);
 1513                 if (error)
 1514                         break;
 1515                 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
 1516                         error = EINVAL;
 1517                         break;
 1518                 }
 1519                 LAGG_XLOCK(sc);
 1520                 sc->sc_flags = 0;
 1521                 if (rf->rf_flags & LAGG_F_HASHL2)
 1522                         sc->sc_flags |= MBUF_HASHFLAG_L2;
 1523                 if (rf->rf_flags & LAGG_F_HASHL3)
 1524                         sc->sc_flags |= MBUF_HASHFLAG_L3;
 1525                 if (rf->rf_flags & LAGG_F_HASHL4)
 1526                         sc->sc_flags |= MBUF_HASHFLAG_L4;
 1527                 LAGG_XUNLOCK(sc);
 1528                 break;
 1529         case SIOCGLAGGPORT:
 1530                 if (rp->rp_portname[0] == '\0' ||
 1531                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1532                         error = EINVAL;
 1533                         break;
 1534                 }
 1535 
 1536                 LAGG_RLOCK();
 1537                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1538                     lp->lp_softc != sc) {
 1539                         error = ENOENT;
 1540                         LAGG_RUNLOCK();
 1541                         if_rele(tpif);
 1542                         break;
 1543                 }
 1544 
 1545                 lagg_port2req(lp, rp);
 1546                 LAGG_RUNLOCK();
 1547                 if_rele(tpif);
 1548                 break;
 1549         case SIOCSLAGGPORT:
 1550                 error = priv_check(td, PRIV_NET_LAGG);
 1551                 if (error)
 1552                         break;
 1553                 if (rp->rp_portname[0] == '\0' ||
 1554                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1555                         error = EINVAL;
 1556                         break;
 1557                 }
 1558 #ifdef INET6
 1559                 /*
 1560                  * A laggport interface should not have inet6 address
 1561                  * because two interfaces with a valid link-local
 1562                  * scope zone must not be merged in any form.  This
 1563                  * restriction is needed to prevent violation of
 1564                  * link-local scope zone.  Attempts to add a laggport
 1565                  * interface which has inet6 addresses triggers
 1566                  * removal of all inet6 addresses on the member
 1567                  * interface.
 1568                  */
 1569                 if (in6ifa_llaonifp(tpif)) {
 1570                         in6_ifdetach(tpif);
 1571                                 if_printf(sc->sc_ifp,
 1572                                     "IPv6 addresses on %s have been removed "
 1573                                     "before adding it as a member to prevent "
 1574                                     "IPv6 address scope violation.\n",
 1575                                     tpif->if_xname);
 1576                 }
 1577 #endif
 1578                 oldmtu = ifp->if_mtu;
 1579                 LAGG_XLOCK(sc);
 1580                 error = lagg_port_create(sc, tpif);
 1581                 LAGG_XUNLOCK(sc);
 1582                 if_rele(tpif);
 1583 
 1584                 /*
 1585                  * LAGG MTU may change during addition of the first port.
 1586                  * If it did, do network layer specific procedure.
 1587                  */
 1588                 if (ifp->if_mtu != oldmtu) {
 1589 #ifdef INET6
 1590                         nd6_setmtu(ifp);
 1591 #endif
 1592                         rt_updatemtu(ifp);
 1593                 }
 1594 
 1595                 VLAN_CAPABILITIES(ifp);
 1596                 break;
 1597         case SIOCSLAGGDELPORT:
 1598                 error = priv_check(td, PRIV_NET_LAGG);
 1599                 if (error)
 1600                         break;
 1601                 if (rp->rp_portname[0] == '\0' ||
 1602                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1603                         error = EINVAL;
 1604                         break;
 1605                 }
 1606 
 1607                 LAGG_XLOCK(sc);
 1608                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1609                     lp->lp_softc != sc) {
 1610                         error = ENOENT;
 1611                         LAGG_XUNLOCK(sc);
 1612                         if_rele(tpif);
 1613                         break;
 1614                 }
 1615 
 1616                 error = lagg_port_destroy(lp, 1);
 1617                 LAGG_XUNLOCK(sc);
 1618                 if_rele(tpif);
 1619                 VLAN_CAPABILITIES(ifp);
 1620                 break;
 1621         case SIOCSIFFLAGS:
 1622                 /* Set flags on ports too */
 1623                 LAGG_XLOCK(sc);
 1624                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1625                         lagg_setflags(lp, 1);
 1626                 }
 1627 
 1628                 if (!(ifp->if_flags & IFF_UP) &&
 1629                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1630                         /*
 1631                          * If interface is marked down and it is running,
 1632                          * then stop and disable it.
 1633                          */
 1634                         lagg_stop(sc);
 1635                         LAGG_XUNLOCK(sc);
 1636                 } else if ((ifp->if_flags & IFF_UP) &&
 1637                     !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1638                         /*
 1639                          * If interface is marked up and it is stopped, then
 1640                          * start it.
 1641                          */
 1642                         LAGG_XUNLOCK(sc);
 1643                         (*ifp->if_init)(sc);
 1644                 } else
 1645                         LAGG_XUNLOCK(sc);
 1646                 break;
 1647         case SIOCADDMULTI:
 1648         case SIOCDELMULTI:
 1649                 LAGG_XLOCK(sc);
 1650                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1651                         lagg_clrmulti(lp);
 1652                         lagg_setmulti(lp);
 1653                 }
 1654                 LAGG_XUNLOCK(sc);
 1655                 error = 0;
 1656                 break;
 1657         case SIOCSIFMEDIA:
 1658         case SIOCGIFMEDIA:
 1659                 if (ifp->if_type == IFT_INFINIBAND)
 1660                         error = EINVAL;
 1661                 else
 1662                         error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 1663                 break;
 1664 
 1665         case SIOCSIFCAP:
 1666                 LAGG_XLOCK(sc);
 1667                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1668                         if (lp->lp_ioctl != NULL)
 1669                                 (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1670                 }
 1671                 lagg_capabilities(sc);
 1672                 LAGG_XUNLOCK(sc);
 1673                 VLAN_CAPABILITIES(ifp);
 1674                 error = 0;
 1675                 break;
 1676 
 1677         case SIOCSIFMTU:
 1678                 LAGG_XLOCK(sc);
 1679                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1680                         if (lp->lp_ioctl != NULL)
 1681                                 error = (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1682                         else
 1683                                 error = EINVAL;
 1684                         if (error != 0) {
 1685                                 if_printf(ifp,
 1686                                     "failed to change MTU to %d on port %s, "
 1687                                     "reverting all ports to original MTU (%d)\n",
 1688                                     ifr->ifr_mtu, lp->lp_ifp->if_xname, ifp->if_mtu);
 1689                                 break;
 1690                         }
 1691                 }
 1692                 if (error == 0) {
 1693                         ifp->if_mtu = ifr->ifr_mtu;
 1694                 } else {
 1695                         /* set every port back to the original MTU */
 1696                         ifr->ifr_mtu = ifp->if_mtu;
 1697                         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1698                                 if (lp->lp_ioctl != NULL)
 1699                                         (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1700                         }
 1701                 }
 1702                 LAGG_XUNLOCK(sc);
 1703                 break;
 1704 
 1705         default:
 1706                 error = ether_ioctl(ifp, cmd, data);
 1707                 break;
 1708         }
 1709         return (error);
 1710 }
 1711 
 1712 #ifdef RATELIMIT
 1713 static int
 1714 lagg_snd_tag_alloc(struct ifnet *ifp,
 1715     union if_snd_tag_alloc_params *params,
 1716     struct m_snd_tag **ppmt)
 1717 {
 1718         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1719         struct lagg_port *lp;
 1720         struct lagg_lb *lb;
 1721         uint32_t p;
 1722 
 1723         LAGG_RLOCK();
 1724         switch (sc->sc_proto) {
 1725         case LAGG_PROTO_FAILOVER:
 1726                 lp = lagg_link_active(sc, sc->sc_primary);
 1727                 break;
 1728         case LAGG_PROTO_LOADBALANCE:
 1729                 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
 1730                     params->hdr.flowtype == M_HASHTYPE_NONE) {
 1731                         LAGG_RUNLOCK();
 1732                         return (EOPNOTSUPP);
 1733                 }
 1734                 p = params->hdr.flowid >> sc->flowid_shift;
 1735                 p %= sc->sc_count;
 1736                 lb = (struct lagg_lb *)sc->sc_psc;
 1737                 lp = lb->lb_ports[p];
 1738                 lp = lagg_link_active(sc, lp);
 1739                 break;
 1740         case LAGG_PROTO_LACP:
 1741                 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
 1742                     params->hdr.flowtype == M_HASHTYPE_NONE) {
 1743                         LAGG_RUNLOCK();
 1744                         return (EOPNOTSUPP);
 1745                 }
 1746                 lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid);
 1747                 break;
 1748         default:
 1749                 LAGG_RUNLOCK();
 1750                 return (EOPNOTSUPP);
 1751         }
 1752         if (lp == NULL) {
 1753                 LAGG_RUNLOCK();
 1754                 return (EOPNOTSUPP);
 1755         }
 1756         ifp = lp->lp_ifp;
 1757         LAGG_RUNLOCK();
 1758         if (ifp == NULL || ifp->if_snd_tag_alloc == NULL ||
 1759             (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
 1760                 return (EOPNOTSUPP);
 1761 
 1762         /* forward allocation request */
 1763         return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
 1764 }
 1765 #endif
 1766 
 1767 static int
 1768 lagg_setmulti(struct lagg_port *lp)
 1769 {
 1770         struct lagg_softc *sc = lp->lp_softc;
 1771         struct ifnet *ifp = lp->lp_ifp;
 1772         struct ifnet *scifp = sc->sc_ifp;
 1773         struct lagg_mc *mc;
 1774         struct ifmultiaddr *ifma;
 1775         int error;
 1776 
 1777         IF_ADDR_WLOCK(scifp);
 1778         CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
 1779                 if (ifma->ifma_addr->sa_family != AF_LINK)
 1780                         continue;
 1781                 mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT);
 1782                 if (mc == NULL) {
 1783                         IF_ADDR_WUNLOCK(scifp);
 1784                         return (ENOMEM);
 1785                 }
 1786                 bcopy(ifma->ifma_addr, &mc->mc_addr,
 1787                     ifma->ifma_addr->sa_len);
 1788                 mc->mc_addr.sdl_index = ifp->if_index;
 1789                 mc->mc_ifma = NULL;
 1790                 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
 1791         }
 1792         IF_ADDR_WUNLOCK(scifp);
 1793         SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
 1794                 error = if_addmulti(ifp,
 1795                     (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
 1796                 if (error)
 1797                         return (error);
 1798         }
 1799         return (0);
 1800 }
 1801 
 1802 static int
 1803 lagg_clrmulti(struct lagg_port *lp)
 1804 {
 1805         struct lagg_mc *mc;
 1806 
 1807         LAGG_XLOCK_ASSERT(lp->lp_softc);
 1808         while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
 1809                 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
 1810                 if (mc->mc_ifma && lp->lp_detaching == 0)
 1811                         if_delmulti_ifma(mc->mc_ifma);
 1812                 free(mc, M_LAGG);
 1813         }
 1814         return (0);
 1815 }
 1816 
 1817 static int
 1818 lagg_setcaps(struct lagg_port *lp, int cap)
 1819 {
 1820         struct ifreq ifr;
 1821 
 1822         if (lp->lp_ifp->if_capenable == cap)
 1823                 return (0);
 1824         if (lp->lp_ioctl == NULL)
 1825                 return (ENXIO);
 1826         ifr.ifr_reqcap = cap;
 1827         return ((*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAP, (caddr_t)&ifr));
 1828 }
 1829 
 1830 /* Handle a ref counted flag that should be set on the lagg port as well */
 1831 static int
 1832 lagg_setflag(struct lagg_port *lp, int flag, int status,
 1833     int (*func)(struct ifnet *, int))
 1834 {
 1835         struct lagg_softc *sc = lp->lp_softc;
 1836         struct ifnet *scifp = sc->sc_ifp;
 1837         struct ifnet *ifp = lp->lp_ifp;
 1838         int error;
 1839 
 1840         LAGG_XLOCK_ASSERT(sc);
 1841 
 1842         status = status ? (scifp->if_flags & flag) : 0;
 1843         /* Now "status" contains the flag value or 0 */
 1844 
 1845         /*
 1846          * See if recorded ports status is different from what
 1847          * we want it to be.  If it is, flip it.  We record ports
 1848          * status in lp_ifflags so that we won't clear ports flag
 1849          * we haven't set.  In fact, we don't clear or set ports
 1850          * flags directly, but get or release references to them.
 1851          * That's why we can be sure that recorded flags still are
 1852          * in accord with actual ports flags.
 1853          */
 1854         if (status != (lp->lp_ifflags & flag)) {
 1855                 error = (*func)(ifp, status);
 1856                 if (error)
 1857                         return (error);
 1858                 lp->lp_ifflags &= ~flag;
 1859                 lp->lp_ifflags |= status;
 1860         }
 1861         return (0);
 1862 }
 1863 
 1864 /*
 1865  * Handle IFF_* flags that require certain changes on the lagg port
 1866  * if "status" is true, update ports flags respective to the lagg
 1867  * if "status" is false, forcedly clear the flags set on port.
 1868  */
 1869 static int
 1870 lagg_setflags(struct lagg_port *lp, int status)
 1871 {
 1872         int error, i;
 1873 
 1874         for (i = 0; lagg_pflags[i].flag; i++) {
 1875                 error = lagg_setflag(lp, lagg_pflags[i].flag,
 1876                     status, lagg_pflags[i].func);
 1877                 if (error)
 1878                         return (error);
 1879         }
 1880         return (0);
 1881 }
 1882 
 1883 static int
 1884 lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m)
 1885 {
 1886         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1887         int error;
 1888 
 1889         LAGG_RLOCK();
 1890         /* We need a Tx algorithm and at least one port */
 1891         if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
 1892                 LAGG_RUNLOCK();
 1893                 m_freem(m);
 1894                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1895                 return (ENXIO);
 1896         }
 1897 
 1898         ETHER_BPF_MTAP(ifp, m);
 1899 
 1900         error = lagg_proto_start(sc, m);
 1901         LAGG_RUNLOCK();
 1902 
 1903         if (error != 0)
 1904                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1905 
 1906         return (error);
 1907 }
 1908 
 1909 static int
 1910 lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m)
 1911 {
 1912         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1913         int error;
 1914 
 1915 #if defined(KERN_TLS) || defined(RATELIMIT)
 1916         if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
 1917                 MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
 1918 #endif
 1919         LAGG_RLOCK();
 1920         /* We need a Tx algorithm and at least one port */
 1921         if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
 1922                 LAGG_RUNLOCK();
 1923                 m_freem(m);
 1924                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1925                 return (ENXIO);
 1926         }
 1927 
 1928         INFINIBAND_BPF_MTAP(ifp, m);
 1929 
 1930         error = lagg_proto_start(sc, m);
 1931         LAGG_RUNLOCK();
 1932         return (error);
 1933 }
 1934 
 1935 /*
 1936  * The ifp->if_qflush entry point for lagg(4) is no-op.
 1937  */
 1938 static void
 1939 lagg_qflush(struct ifnet *ifp __unused)
 1940 {
 1941 }
 1942 
 1943 static struct mbuf *
 1944 lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m)
 1945 {
 1946         struct lagg_port *lp = ifp->if_lagg;
 1947         struct lagg_softc *sc = lp->lp_softc;
 1948         struct ifnet *scifp = sc->sc_ifp;
 1949 
 1950         LAGG_RLOCK();
 1951         if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 1952             lp->lp_detaching != 0 ||
 1953             sc->sc_proto == LAGG_PROTO_NONE) {
 1954                 LAGG_RUNLOCK();
 1955                 m_freem(m);
 1956                 return (NULL);
 1957         }
 1958 
 1959         ETHER_BPF_MTAP(scifp, m);
 1960 
 1961         m = lagg_proto_input(sc, lp, m);
 1962         if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
 1963                 m_freem(m);
 1964                 m = NULL;
 1965         }
 1966 
 1967         LAGG_RUNLOCK();
 1968         return (m);
 1969 }
 1970 
 1971 static struct mbuf *
 1972 lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m)
 1973 {
 1974         struct lagg_port *lp = ifp->if_lagg;
 1975         struct lagg_softc *sc = lp->lp_softc;
 1976         struct ifnet *scifp = sc->sc_ifp;
 1977 
 1978         LAGG_RLOCK();
 1979         if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 1980             lp->lp_detaching != 0 ||
 1981             sc->sc_proto == LAGG_PROTO_NONE) {
 1982                 LAGG_RUNLOCK();
 1983                 m_freem(m);
 1984                 return (NULL);
 1985         }
 1986 
 1987         INFINIBAND_BPF_MTAP(scifp, m);
 1988 
 1989         m = lagg_proto_input(sc, lp, m);
 1990         if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
 1991                 m_freem(m);
 1992                 m = NULL;
 1993         }
 1994 
 1995         LAGG_RUNLOCK();
 1996         return (m);
 1997 }
 1998 
 1999 static int
 2000 lagg_media_change(struct ifnet *ifp)
 2001 {
 2002         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 2003 
 2004         if (sc->sc_ifflags & IFF_DEBUG)
 2005                 printf("%s\n", __func__);
 2006 
 2007         /* Ignore */
 2008         return (0);
 2009 }
 2010 
 2011 static void
 2012 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
 2013 {
 2014         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 2015         struct lagg_port *lp;
 2016 
 2017         imr->ifm_status = IFM_AVALID;
 2018         imr->ifm_active = IFM_ETHER | IFM_AUTO;
 2019 
 2020         LAGG_RLOCK();
 2021         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 2022                 if (LAGG_PORTACTIVE(lp))
 2023                         imr->ifm_status |= IFM_ACTIVE;
 2024         }
 2025         LAGG_RUNLOCK();
 2026 }
 2027 
 2028 static void
 2029 lagg_linkstate(struct lagg_softc *sc)
 2030 {
 2031         struct lagg_port *lp;
 2032         int new_link = LINK_STATE_DOWN;
 2033         uint64_t speed;
 2034 
 2035         LAGG_XLOCK_ASSERT(sc);
 2036 
 2037         /* LACP handles link state itself */
 2038         if (sc->sc_proto == LAGG_PROTO_LACP)
 2039                 return;
 2040 
 2041         /* Our link is considered up if at least one of our ports is active */
 2042         LAGG_RLOCK();
 2043         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 2044                 if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
 2045                         new_link = LINK_STATE_UP;
 2046                         break;
 2047                 }
 2048         }
 2049         LAGG_RUNLOCK();
 2050         if_link_state_change(sc->sc_ifp, new_link);
 2051 
 2052         /* Update if_baudrate to reflect the max possible speed */
 2053         switch (sc->sc_proto) {
 2054                 case LAGG_PROTO_FAILOVER:
 2055                         sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
 2056                             sc->sc_primary->lp_ifp->if_baudrate : 0;
 2057                         break;
 2058                 case LAGG_PROTO_ROUNDROBIN:
 2059                 case LAGG_PROTO_LOADBALANCE:
 2060                 case LAGG_PROTO_BROADCAST:
 2061                         speed = 0;
 2062                         LAGG_RLOCK();
 2063                         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2064                                 speed += lp->lp_ifp->if_baudrate;
 2065                         LAGG_RUNLOCK();
 2066                         sc->sc_ifp->if_baudrate = speed;
 2067                         break;
 2068                 case LAGG_PROTO_LACP:
 2069                         /* LACP updates if_baudrate itself */
 2070                         break;
 2071         }
 2072 }
 2073 
 2074 static void
 2075 lagg_port_state(struct ifnet *ifp, int state)
 2076 {
 2077         struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
 2078         struct lagg_softc *sc = NULL;
 2079 
 2080         if (lp != NULL)
 2081                 sc = lp->lp_softc;
 2082         if (sc == NULL)
 2083                 return;
 2084 
 2085         LAGG_XLOCK(sc);
 2086         lagg_linkstate(sc);
 2087         lagg_proto_linkstate(sc, lp);
 2088         LAGG_XUNLOCK(sc);
 2089 }
 2090 
 2091 struct lagg_port *
 2092 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
 2093 {
 2094         struct lagg_port *lp_next, *rval = NULL;
 2095 
 2096         /*
 2097          * Search a port which reports an active link state.
 2098          */
 2099 
 2100 #ifdef INVARIANTS
 2101         /*
 2102          * This is called with either LAGG_RLOCK() held or
 2103          * LAGG_XLOCK(sc) held.
 2104          */
 2105         if (!in_epoch(net_epoch_preempt))
 2106                 LAGG_XLOCK_ASSERT(sc);
 2107 #endif
 2108 
 2109         if (lp == NULL)
 2110                 goto search;
 2111         if (LAGG_PORTACTIVE(lp)) {
 2112                 rval = lp;
 2113                 goto found;
 2114         }
 2115         if ((lp_next = CK_SLIST_NEXT(lp, lp_entries)) != NULL &&
 2116             LAGG_PORTACTIVE(lp_next)) {
 2117                 rval = lp_next;
 2118                 goto found;
 2119         }
 2120 
 2121 search:
 2122         CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 2123                 if (LAGG_PORTACTIVE(lp_next)) {
 2124                         return (lp_next);
 2125                 }
 2126         }
 2127 found:
 2128         return (rval);
 2129 }
 2130 
 2131 int
 2132 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
 2133 {
 2134 
 2135         return (ifp->if_transmit)(ifp, m);
 2136 }
 2137 
 2138 /*
 2139  * Simple round robin aggregation
 2140  */
 2141 static void
 2142 lagg_rr_attach(struct lagg_softc *sc)
 2143 {
 2144         sc->sc_seq = 0;
 2145         sc->sc_stride = 1;
 2146 }
 2147 
 2148 static int
 2149 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
 2150 {
 2151         struct lagg_port *lp;
 2152         uint32_t p;
 2153 
 2154         p = atomic_fetchadd_32(&sc->sc_seq, 1);
 2155         p /= sc->sc_stride;
 2156         p %= sc->sc_count;
 2157         lp = CK_SLIST_FIRST(&sc->sc_ports);
 2158 
 2159         while (p--)
 2160                 lp = CK_SLIST_NEXT(lp, lp_entries);
 2161 
 2162         /*
 2163          * Check the port's link state. This will return the next active
 2164          * port if the link is down or the port is NULL.
 2165          */
 2166         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 2167                 m_freem(m);
 2168                 return (ENETDOWN);
 2169         }
 2170 
 2171         /* Send mbuf */
 2172         return (lagg_enqueue(lp->lp_ifp, m));
 2173 }
 2174 
 2175 static struct mbuf *
 2176 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2177 {
 2178         struct ifnet *ifp = sc->sc_ifp;
 2179 
 2180         /* Just pass in the packet to our lagg device */
 2181         m->m_pkthdr.rcvif = ifp;
 2182 
 2183         return (m);
 2184 }
 2185 
 2186 /*
 2187  * Broadcast mode
 2188  */
 2189 static int
 2190 lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
 2191 {
 2192         int errors = 0;
 2193         int ret;
 2194         struct lagg_port *lp, *last = NULL;
 2195         struct mbuf *m0;
 2196 
 2197         LAGG_RLOCK_ASSERT();
 2198         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 2199                 if (!LAGG_PORTACTIVE(lp))
 2200                         continue;
 2201 
 2202                 if (last != NULL) {
 2203                         m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 2204                         if (m0 == NULL) {
 2205                                 ret = ENOBUFS;
 2206                                 errors++;
 2207                                 break;
 2208                         }
 2209 
 2210                         ret = lagg_enqueue(last->lp_ifp, m0);
 2211                         if (ret != 0)
 2212                                 errors++;
 2213                 }
 2214                 last = lp;
 2215         }
 2216 
 2217         if (last == NULL) {
 2218                 m_freem(m);
 2219                 return (ENOENT);
 2220         }
 2221         if ((last = lagg_link_active(sc, last)) == NULL) {
 2222                 m_freem(m);
 2223                 return (ENETDOWN);
 2224         }
 2225 
 2226         ret = lagg_enqueue(last->lp_ifp, m);
 2227         if (ret != 0)
 2228                 errors++;
 2229 
 2230         if (errors == 0)
 2231                 return (ret);
 2232 
 2233         return (0);
 2234 }
 2235 
 2236 static struct mbuf*
 2237 lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2238 {
 2239         struct ifnet *ifp = sc->sc_ifp;
 2240 
 2241         /* Just pass in the packet to our lagg device */
 2242         m->m_pkthdr.rcvif = ifp;
 2243         return (m);
 2244 }
 2245 
 2246 /*
 2247  * Active failover
 2248  */
 2249 static int
 2250 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
 2251 {
 2252         struct lagg_port *lp;
 2253 
 2254         /* Use the master port if active or the next available port */
 2255         if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
 2256                 m_freem(m);
 2257                 return (ENETDOWN);
 2258         }
 2259 
 2260         /* Send mbuf */
 2261         return (lagg_enqueue(lp->lp_ifp, m));
 2262 }
 2263 
 2264 static struct mbuf *
 2265 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2266 {
 2267         struct ifnet *ifp = sc->sc_ifp;
 2268         struct lagg_port *tmp_tp;
 2269 
 2270         if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
 2271                 m->m_pkthdr.rcvif = ifp;
 2272                 return (m);
 2273         }
 2274 
 2275         if (!LAGG_PORTACTIVE(sc->sc_primary)) {
 2276                 tmp_tp = lagg_link_active(sc, sc->sc_primary);
 2277                 /*
 2278                  * If tmp_tp is null, we've received a packet when all
 2279                  * our links are down. Weird, but process it anyways.
 2280                  */
 2281                 if ((tmp_tp == NULL || tmp_tp == lp)) {
 2282                         m->m_pkthdr.rcvif = ifp;
 2283                         return (m);
 2284                 }
 2285         }
 2286 
 2287         m_freem(m);
 2288         return (NULL);
 2289 }
 2290 
 2291 /*
 2292  * Loadbalancing
 2293  */
 2294 static void
 2295 lagg_lb_attach(struct lagg_softc *sc)
 2296 {
 2297         struct lagg_port *lp;
 2298         struct lagg_lb *lb;
 2299 
 2300         LAGG_XLOCK_ASSERT(sc);
 2301         lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO);
 2302         lb->lb_key = m_ether_tcpip_hash_init();
 2303         sc->sc_psc = lb;
 2304 
 2305         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2306                 lagg_lb_port_create(lp);
 2307 }
 2308 
 2309 static void
 2310 lagg_lb_detach(struct lagg_softc *sc)
 2311 {
 2312         struct lagg_lb *lb;
 2313 
 2314         lb = (struct lagg_lb *)sc->sc_psc;
 2315         if (lb != NULL)
 2316                 free(lb, M_LAGG);
 2317 }
 2318 
 2319 static int
 2320 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
 2321 {
 2322         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 2323         struct lagg_port *lp_next;
 2324         int i = 0, rv;
 2325 
 2326         rv = 0;
 2327         bzero(&lb->lb_ports, sizeof(lb->lb_ports));
 2328         LAGG_XLOCK_ASSERT(sc);
 2329         CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 2330                 if (lp_next == lp)
 2331                         continue;
 2332                 if (i >= LAGG_MAX_PORTS) {
 2333                         rv = EINVAL;
 2334                         break;
 2335                 }
 2336                 if (sc->sc_ifflags & IFF_DEBUG)
 2337                         printf("%s: port %s at index %d\n",
 2338                             sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
 2339                 lb->lb_ports[i++] = lp_next;
 2340         }
 2341 
 2342         return (rv);
 2343 }
 2344 
 2345 static int
 2346 lagg_lb_port_create(struct lagg_port *lp)
 2347 {
 2348         struct lagg_softc *sc = lp->lp_softc;
 2349         return (lagg_lb_porttable(sc, NULL));
 2350 }
 2351 
 2352 static void
 2353 lagg_lb_port_destroy(struct lagg_port *lp)
 2354 {
 2355         struct lagg_softc *sc = lp->lp_softc;
 2356         lagg_lb_porttable(sc, lp);
 2357 }
 2358 
 2359 static int
 2360 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
 2361 {
 2362         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 2363         struct lagg_port *lp = NULL;
 2364         uint32_t p = 0;
 2365 
 2366         if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
 2367             M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 2368                 p = m->m_pkthdr.flowid >> sc->flowid_shift;
 2369         else
 2370                 p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
 2371         p %= sc->sc_count;
 2372         lp = lb->lb_ports[p];
 2373 
 2374         /*
 2375          * Check the port's link state. This will return the next active
 2376          * port if the link is down or the port is NULL.
 2377          */
 2378         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 2379                 m_freem(m);
 2380                 return (ENETDOWN);
 2381         }
 2382 
 2383         /* Send mbuf */
 2384         return (lagg_enqueue(lp->lp_ifp, m));
 2385 }
 2386 
 2387 static struct mbuf *
 2388 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2389 {
 2390         struct ifnet *ifp = sc->sc_ifp;
 2391 
 2392         /* Just pass in the packet to our lagg device */
 2393         m->m_pkthdr.rcvif = ifp;
 2394 
 2395         return (m);
 2396 }
 2397 
 2398 /*
 2399  * 802.3ad LACP
 2400  */
 2401 static void
 2402 lagg_lacp_attach(struct lagg_softc *sc)
 2403 {
 2404         struct lagg_port *lp;
 2405 
 2406         lacp_attach(sc);
 2407         LAGG_XLOCK_ASSERT(sc);
 2408         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2409                 lacp_port_create(lp);
 2410 }
 2411 
 2412 static void
 2413 lagg_lacp_detach(struct lagg_softc *sc)
 2414 {
 2415         struct lagg_port *lp;
 2416         void *psc;
 2417 
 2418         LAGG_XLOCK_ASSERT(sc);
 2419         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2420                 lacp_port_destroy(lp);
 2421 
 2422         psc = sc->sc_psc;
 2423         sc->sc_psc = NULL;
 2424         lacp_detach(psc);
 2425 }
 2426 
 2427 static void
 2428 lagg_lacp_lladdr(struct lagg_softc *sc)
 2429 {
 2430         struct lagg_port *lp;
 2431 
 2432         LAGG_SXLOCK_ASSERT(sc);
 2433 
 2434         /* purge all the lacp ports */
 2435         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2436                 lacp_port_destroy(lp);
 2437 
 2438         /* add them back in */
 2439         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2440                 lacp_port_create(lp);
 2441 }
 2442 
 2443 static int
 2444 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
 2445 {
 2446         struct lagg_port *lp;
 2447 
 2448         lp = lacp_select_tx_port(sc, m);
 2449         if (lp == NULL) {
 2450                 m_freem(m);
 2451                 return (ENETDOWN);
 2452         }
 2453 
 2454         /* Send mbuf */
 2455         return (lagg_enqueue(lp->lp_ifp, m));
 2456 }
 2457 
 2458 static struct mbuf *
 2459 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2460 {
 2461         struct ifnet *ifp = sc->sc_ifp;
 2462         struct ether_header *eh;
 2463         u_short etype;
 2464 
 2465         eh = mtod(m, struct ether_header *);
 2466         etype = ntohs(eh->ether_type);
 2467 
 2468         /* Tap off LACP control messages */
 2469         if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
 2470                 m = lacp_input(lp, m);
 2471                 if (m == NULL)
 2472                         return (NULL);
 2473         }
 2474 
 2475         /*
 2476          * If the port is not collecting or not in the active aggregator then
 2477          * free and return.
 2478          */
 2479         if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
 2480                 m_freem(m);
 2481                 return (NULL);
 2482         }
 2483 
 2484         m->m_pkthdr.rcvif = ifp;
 2485         return (m);
 2486 }
 2487 

Cache object: b2f7474d720ab58cc966bd28f0ec44a1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.