The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_lagg.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $      */
    2 
    3 /*
    4  * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
    5  * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
    6  * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
    7  *
    8  * Permission to use, copy, modify, and distribute this software for any
    9  * purpose with or without fee is hereby granted, provided that the above
   10  * copyright notice and this permission notice appear in all copies.
   11  *
   12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   19  */
   20 
   21 #include <sys/cdefs.h>
   22 __FBSDID("$FreeBSD$");
   23 
   24 #include "opt_inet.h"
   25 #include "opt_inet6.h"
   26 
   27 #include <sys/param.h>
   28 #include <sys/kernel.h>
   29 #include <sys/malloc.h>
   30 #include <sys/mbuf.h>
   31 #include <sys/queue.h>
   32 #include <sys/socket.h>
   33 #include <sys/sockio.h>
   34 #include <sys/sysctl.h>
   35 #include <sys/module.h>
   36 #include <sys/priv.h>
   37 #include <sys/systm.h>
   38 #include <sys/proc.h>
   39 #include <sys/lock.h>
   40 #include <sys/rmlock.h>
   41 #include <sys/sx.h>
   42 #include <sys/taskqueue.h>
   43 #include <sys/eventhandler.h>
   44 
   45 #include <net/ethernet.h>
   46 #include <net/if.h>
   47 #include <net/if_clone.h>
   48 #include <net/if_arp.h>
   49 #include <net/if_dl.h>
   50 #include <net/if_media.h>
   51 #include <net/if_types.h>
   52 #include <net/if_var.h>
   53 #include <net/bpf.h>
   54 #include <net/vnet.h>
   55 
   56 #if defined(INET) || defined(INET6)
   57 #include <netinet/in.h>
   58 #include <netinet/ip.h>
   59 #endif
   60 #ifdef INET
   61 #include <netinet/in_systm.h>
   62 #include <netinet/if_ether.h>
   63 #endif
   64 
   65 #ifdef INET6
   66 #include <netinet/ip6.h>
   67 #include <netinet6/in6_var.h>
   68 #include <netinet6/in6_ifattach.h>
   69 #endif
   70 
   71 #include <net/if_vlan_var.h>
   72 #include <net/if_lagg.h>
   73 #include <net/ieee8023ad_lacp.h>
   74 
   75 /* Special flags we should propagate to the lagg ports. */
   76 static struct {
   77         int flag;
   78         int (*func)(struct ifnet *, int);
   79 } lagg_pflags[] = {
   80         {IFF_PROMISC, ifpromisc},
   81         {IFF_ALLMULTI, if_allmulti},
   82         {0, NULL}
   83 };
   84 
   85 VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
   86 #define V_lagg_list     VNET(lagg_list)
   87 static VNET_DEFINE(struct mtx, lagg_list_mtx);
   88 #define V_lagg_list_mtx VNET(lagg_list_mtx)
   89 #define LAGG_LIST_LOCK_INIT(x)          mtx_init(&V_lagg_list_mtx, \
   90                                         "if_lagg list", NULL, MTX_DEF)
   91 #define LAGG_LIST_LOCK_DESTROY(x)       mtx_destroy(&V_lagg_list_mtx)
   92 #define LAGG_LIST_LOCK(x)               mtx_lock(&V_lagg_list_mtx)
   93 #define LAGG_LIST_UNLOCK(x)             mtx_unlock(&V_lagg_list_mtx)
   94 eventhandler_tag        lagg_detach_cookie = NULL;
   95 
   96 static int      lagg_clone_create(struct if_clone *, int, caddr_t);
   97 static void     lagg_clone_destroy(struct ifnet *);
   98 static VNET_DEFINE(struct if_clone *, lagg_cloner);
   99 #define V_lagg_cloner   VNET(lagg_cloner)
  100 static const char laggname[] = "lagg";
  101 
  102 static void     lagg_capabilities(struct lagg_softc *);
  103 static int      lagg_port_create(struct lagg_softc *, struct ifnet *);
  104 static int      lagg_port_destroy(struct lagg_port *, int);
  105 static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
  106 static void     lagg_linkstate(struct lagg_softc *);
  107 static void     lagg_port_state(struct ifnet *, int);
  108 static int      lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
  109 static int      lagg_port_output(struct ifnet *, struct mbuf *,
  110                     const struct sockaddr *, struct route *);
  111 static void     lagg_port_ifdetach(void *arg __unused, struct ifnet *);
  112 #ifdef LAGG_PORT_STACKING
  113 static int      lagg_port_checkstacking(struct lagg_softc *);
  114 #endif
  115 static void     lagg_port2req(struct lagg_port *, struct lagg_reqport *);
  116 static void     lagg_init(void *);
  117 static void     lagg_stop(struct lagg_softc *);
  118 static int      lagg_ioctl(struct ifnet *, u_long, caddr_t);
  119 static int      lagg_setmulti(struct lagg_port *);
  120 static int      lagg_clrmulti(struct lagg_port *);
  121 static  int     lagg_setcaps(struct lagg_port *, int cap);
  122 static  int     lagg_setflag(struct lagg_port *, int, int,
  123                     int (*func)(struct ifnet *, int));
  124 static  int     lagg_setflags(struct lagg_port *, int status);
  125 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
  126 static int      lagg_transmit(struct ifnet *, struct mbuf *);
  127 static void     lagg_qflush(struct ifnet *);
  128 static int      lagg_media_change(struct ifnet *);
  129 static void     lagg_media_status(struct ifnet *, struct ifmediareq *);
  130 static struct lagg_port *lagg_link_active(struct lagg_softc *,
  131             struct lagg_port *);
  132 
  133 /* Simple round robin */
  134 static void     lagg_rr_attach(struct lagg_softc *);
  135 static int      lagg_rr_start(struct lagg_softc *, struct mbuf *);
  136 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
  137                     struct mbuf *);
  138 
  139 /* Active failover */
  140 static int      lagg_fail_start(struct lagg_softc *, struct mbuf *);
  141 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
  142                     struct mbuf *);
  143 
  144 /* Loadbalancing */
  145 static void     lagg_lb_attach(struct lagg_softc *);
  146 static void     lagg_lb_detach(struct lagg_softc *);
  147 static int      lagg_lb_port_create(struct lagg_port *);
  148 static void     lagg_lb_port_destroy(struct lagg_port *);
  149 static int      lagg_lb_start(struct lagg_softc *, struct mbuf *);
  150 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
  151                     struct mbuf *);
  152 static int      lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
  153 
  154 /* Broadcast */
  155 static int    lagg_bcast_start(struct lagg_softc *, struct mbuf *);
  156 static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
  157                     struct mbuf *);
  158 
  159 /* 802.3ad LACP */
  160 static void     lagg_lacp_attach(struct lagg_softc *);
  161 static void     lagg_lacp_detach(struct lagg_softc *);
  162 static int      lagg_lacp_start(struct lagg_softc *, struct mbuf *);
  163 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
  164                     struct mbuf *);
  165 static void     lagg_lacp_lladdr(struct lagg_softc *);
  166 
  167 /* lagg protocol table */
  168 static const struct lagg_proto {
  169         lagg_proto      pr_num;
  170         void            (*pr_attach)(struct lagg_softc *);
  171         void            (*pr_detach)(struct lagg_softc *);
  172         int             (*pr_start)(struct lagg_softc *, struct mbuf *);
  173         struct mbuf *   (*pr_input)(struct lagg_softc *, struct lagg_port *,
  174                             struct mbuf *);
  175         int             (*pr_addport)(struct lagg_port *);
  176         void            (*pr_delport)(struct lagg_port *);
  177         void            (*pr_linkstate)(struct lagg_port *);
  178         void            (*pr_init)(struct lagg_softc *);
  179         void            (*pr_stop)(struct lagg_softc *);
  180         void            (*pr_lladdr)(struct lagg_softc *);
  181         void            (*pr_request)(struct lagg_softc *, void *);
  182         void            (*pr_portreq)(struct lagg_port *, void *);
  183 } lagg_protos[] = {
  184     {
  185         .pr_num = LAGG_PROTO_NONE
  186     },
  187     {
  188         .pr_num = LAGG_PROTO_ROUNDROBIN,
  189         .pr_attach = lagg_rr_attach,
  190         .pr_start = lagg_rr_start,
  191         .pr_input = lagg_rr_input,
  192     },
  193     {
  194         .pr_num = LAGG_PROTO_FAILOVER,
  195         .pr_start = lagg_fail_start,
  196         .pr_input = lagg_fail_input,
  197     },
  198     {
  199         .pr_num = LAGG_PROTO_LOADBALANCE,
  200         .pr_attach = lagg_lb_attach,
  201         .pr_detach = lagg_lb_detach,
  202         .pr_start = lagg_lb_start,
  203         .pr_input = lagg_lb_input,
  204         .pr_addport = lagg_lb_port_create,
  205         .pr_delport = lagg_lb_port_destroy,
  206     },
  207     {
  208         .pr_num = LAGG_PROTO_LACP,
  209         .pr_attach = lagg_lacp_attach,
  210         .pr_detach = lagg_lacp_detach,
  211         .pr_start = lagg_lacp_start,
  212         .pr_input = lagg_lacp_input,
  213         .pr_addport = lacp_port_create,
  214         .pr_delport = lacp_port_destroy,
  215         .pr_linkstate = lacp_linkstate,
  216         .pr_init = lacp_init,
  217         .pr_stop = lacp_stop,
  218         .pr_lladdr = lagg_lacp_lladdr,
  219         .pr_request = lacp_req,
  220         .pr_portreq = lacp_portreq,
  221     },
  222     {
  223         .pr_num = LAGG_PROTO_BROADCAST,
  224         .pr_start = lagg_bcast_start,
  225         .pr_input = lagg_bcast_input,
  226     },
  227 };
  228 
  229 SYSCTL_DECL(_net_link);
  230 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
  231     "Link Aggregation");
  232 
  233 /* Allow input on any failover links */
  234 static VNET_DEFINE(int, lagg_failover_rx_all);
  235 #define V_lagg_failover_rx_all  VNET(lagg_failover_rx_all)
  236 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
  237     &VNET_NAME(lagg_failover_rx_all), 0,
  238     "Accept input from any interface in a failover lagg");
  239 
  240 /* Default value for using flowid */
  241 static VNET_DEFINE(int, def_use_flowid) = 0;
  242 #define V_def_use_flowid        VNET(def_use_flowid)
  243 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
  244     &VNET_NAME(def_use_flowid), 0,
  245     "Default setting for using flow id for load sharing");
  246 
  247 /* Default value for flowid shift */
  248 static VNET_DEFINE(int, def_flowid_shift) = 16;
  249 #define V_def_flowid_shift      VNET(def_flowid_shift)
  250 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
  251     &VNET_NAME(def_flowid_shift), 0,
  252     "Default setting for flowid shift for load sharing");
  253 
  254 static void
  255 vnet_lagg_init(const void *unused __unused)
  256 {
  257 
  258         LAGG_LIST_LOCK_INIT();
  259         SLIST_INIT(&V_lagg_list);
  260         V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
  261             lagg_clone_destroy, 0);
  262 }
  263 VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
  264     vnet_lagg_init, NULL);
  265 
  266 static void
  267 vnet_lagg_uninit(const void *unused __unused)
  268 {
  269 
  270         if_clone_detach(V_lagg_cloner);
  271         LAGG_LIST_LOCK_DESTROY();
  272 }
  273 VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
  274     vnet_lagg_uninit, NULL);
  275 
  276 static int
  277 lagg_modevent(module_t mod, int type, void *data)
  278 {
  279 
  280         switch (type) {
  281         case MOD_LOAD:
  282                 lagg_input_p = lagg_input;
  283                 lagg_linkstate_p = lagg_port_state;
  284                 lagg_detach_cookie = EVENTHANDLER_REGISTER(
  285                     ifnet_departure_event, lagg_port_ifdetach, NULL,
  286                     EVENTHANDLER_PRI_ANY);
  287                 break;
  288         case MOD_UNLOAD:
  289                 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
  290                     lagg_detach_cookie);
  291                 lagg_input_p = NULL;
  292                 lagg_linkstate_p = NULL;
  293                 break;
  294         default:
  295                 return (EOPNOTSUPP);
  296         }
  297         return (0);
  298 }
  299 
  300 static moduledata_t lagg_mod = {
  301         "if_lagg",
  302         lagg_modevent,
  303         0
  304 };
  305 
  306 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  307 MODULE_VERSION(if_lagg, 1);
  308 
  309 static void
  310 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
  311 {
  312 
  313         LAGG_XLOCK_ASSERT(sc);
  314         KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
  315             __func__, sc));
  316 
  317         if (sc->sc_ifflags & IFF_DEBUG)
  318                 if_printf(sc->sc_ifp, "using proto %u\n", pr);
  319 
  320         if (lagg_protos[pr].pr_attach != NULL)
  321                 lagg_protos[pr].pr_attach(sc);
  322         sc->sc_proto = pr;
  323 }
  324 
  325 static void
  326 lagg_proto_detach(struct lagg_softc *sc)
  327 {
  328         lagg_proto pr;
  329 
  330         LAGG_XLOCK_ASSERT(sc);
  331         LAGG_WLOCK_ASSERT(sc);
  332         pr = sc->sc_proto;
  333         sc->sc_proto = LAGG_PROTO_NONE;
  334 
  335         if (lagg_protos[pr].pr_detach != NULL)
  336                 lagg_protos[pr].pr_detach(sc);
  337         else
  338                 LAGG_WUNLOCK(sc);
  339 }
  340 
  341 static int
  342 lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
  343 {
  344 
  345         return (lagg_protos[sc->sc_proto].pr_start(sc, m));
  346 }
  347 
  348 static struct mbuf *
  349 lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
  350 {
  351 
  352         return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
  353 }
  354 
  355 static int
  356 lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
  357 {
  358 
  359         if (lagg_protos[sc->sc_proto].pr_addport == NULL)
  360                 return (0);
  361         else
  362                 return (lagg_protos[sc->sc_proto].pr_addport(lp));
  363 }
  364 
  365 static void
  366 lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
  367 {
  368 
  369         if (lagg_protos[sc->sc_proto].pr_delport != NULL)
  370                 lagg_protos[sc->sc_proto].pr_delport(lp);
  371 }
  372 
  373 static void
  374 lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
  375 {
  376 
  377         if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
  378                 lagg_protos[sc->sc_proto].pr_linkstate(lp);
  379 }
  380 
  381 static void
  382 lagg_proto_init(struct lagg_softc *sc)
  383 {
  384 
  385         if (lagg_protos[sc->sc_proto].pr_init != NULL)
  386                 lagg_protos[sc->sc_proto].pr_init(sc);
  387 }
  388 
  389 static void
  390 lagg_proto_stop(struct lagg_softc *sc)
  391 {
  392 
  393         if (lagg_protos[sc->sc_proto].pr_stop != NULL)
  394                 lagg_protos[sc->sc_proto].pr_stop(sc);
  395 }
  396 
  397 static void
  398 lagg_proto_lladdr(struct lagg_softc *sc)
  399 {
  400 
  401         if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
  402                 lagg_protos[sc->sc_proto].pr_lladdr(sc);
  403 }
  404 
  405 static void
  406 lagg_proto_request(struct lagg_softc *sc, void *v)
  407 {
  408 
  409         if (lagg_protos[sc->sc_proto].pr_request != NULL)
  410                 lagg_protos[sc->sc_proto].pr_request(sc, v);
  411 }
  412 
  413 static void
  414 lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
  415 {
  416 
  417         if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
  418                 lagg_protos[sc->sc_proto].pr_portreq(lp, v);
  419 }
  420 
  421 /*
  422  * This routine is run via an vlan
  423  * config EVENT
  424  */
  425 static void
  426 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  427 {
  428         struct lagg_softc *sc = ifp->if_softc;
  429         struct lagg_port *lp;
  430 
  431         if (ifp->if_softc !=  arg)   /* Not our event */
  432                 return;
  433 
  434         LAGG_SLOCK(sc);
  435         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  436                 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
  437         LAGG_SUNLOCK(sc);
  438 }
  439 
  440 /*
  441  * This routine is run via an vlan
  442  * unconfig EVENT
  443  */
  444 static void
  445 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  446 {
  447         struct lagg_softc *sc = ifp->if_softc;
  448         struct lagg_port *lp;
  449 
  450         if (ifp->if_softc !=  arg)   /* Not our event */
  451                 return;
  452 
  453         LAGG_SLOCK(sc);
  454         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  455                 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
  456         LAGG_SUNLOCK(sc);
  457 }
  458 
  459 static int
  460 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
  461 {
  462         struct lagg_softc *sc;
  463         struct ifnet *ifp;
  464         static const u_char eaddr[6];   /* 00:00:00:00:00:00 */
  465 
  466         sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
  467         ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
  468         if (ifp == NULL) {
  469                 free(sc, M_DEVBUF);
  470                 return (ENOSPC);
  471         }
  472         LAGG_LOCK_INIT(sc);
  473         LAGG_SX_INIT(sc);
  474 
  475         LAGG_XLOCK(sc);
  476         if (V_def_use_flowid)
  477                 sc->sc_opts |= LAGG_OPT_USE_FLOWID;
  478         sc->flowid_shift = V_def_flowid_shift;
  479 
  480         /* Hash all layers by default */
  481         sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
  482 
  483         lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
  484 
  485         SLIST_INIT(&sc->sc_ports);
  486 
  487         /* Initialise pseudo media types */
  488         ifmedia_init(&sc->sc_media, 0, lagg_media_change,
  489             lagg_media_status);
  490         ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
  491         ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
  492 
  493         if_initname(ifp, laggname, unit);
  494         ifp->if_softc = sc;
  495         ifp->if_transmit = lagg_transmit;
  496         ifp->if_qflush = lagg_qflush;
  497         ifp->if_init = lagg_init;
  498         ifp->if_ioctl = lagg_ioctl;
  499         ifp->if_get_counter = lagg_get_counter;
  500         ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
  501         ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
  502 
  503         /*
  504          * Attach as an ordinary ethernet device, children will be attached
  505          * as special device IFT_IEEE8023ADLAG.
  506          */
  507         ether_ifattach(ifp, eaddr);
  508 
  509         sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
  510                 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
  511         sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
  512                 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
  513 
  514         /* Insert into the global list of laggs */
  515         LAGG_LIST_LOCK();
  516         SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
  517         LAGG_LIST_UNLOCK();
  518         LAGG_XUNLOCK(sc);
  519 
  520         return (0);
  521 }
  522 
  523 static void
  524 lagg_clone_destroy(struct ifnet *ifp)
  525 {
  526         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  527         struct lagg_port *lp;
  528 
  529         LAGG_XLOCK(sc);
  530         sc->sc_destroying = 1;
  531         lagg_stop(sc);
  532         ifp->if_flags &= ~IFF_UP;
  533 
  534         EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
  535         EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
  536 
  537         /* Shutdown and remove lagg ports */
  538         while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
  539                 lagg_port_destroy(lp, 1);
  540 
  541         /* Unhook the aggregation protocol */
  542         LAGG_WLOCK(sc);
  543         lagg_proto_detach(sc);
  544         LAGG_UNLOCK_ASSERT(sc);
  545         LAGG_XUNLOCK(sc);
  546 
  547         ifmedia_removeall(&sc->sc_media);
  548         ether_ifdetach(ifp);
  549         if_free(ifp);
  550 
  551         LAGG_LIST_LOCK();
  552         SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
  553         LAGG_LIST_UNLOCK();
  554 
  555         LAGG_SX_DESTROY(sc);
  556         LAGG_LOCK_DESTROY(sc);
  557         free(sc, M_DEVBUF);
  558 }
  559 
  560 static void
  561 lagg_capabilities(struct lagg_softc *sc)
  562 {
  563         struct lagg_port *lp;
  564         int cap, ena, pena;
  565         uint64_t hwa;
  566         struct ifnet_hw_tsomax hw_tsomax;
  567 
  568         LAGG_XLOCK_ASSERT(sc);
  569 
  570         /* Get common enabled capabilities for the lagg ports */
  571         ena = ~0;
  572         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  573                 ena &= lp->lp_ifp->if_capenable;
  574         ena = (ena == ~0 ? 0 : ena);
  575 
  576         /*
  577          * Apply common enabled capabilities back to the lagg ports.
  578          * May require several iterations if they are dependent.
  579          */
  580         do {
  581                 pena = ena;
  582                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  583                         lagg_setcaps(lp, ena);
  584                         ena &= lp->lp_ifp->if_capenable;
  585                 }
  586         } while (pena != ena);
  587 
  588         /* Get other capabilities from the lagg ports */
  589         cap = ~0;
  590         hwa = ~(uint64_t)0;
  591         memset(&hw_tsomax, 0, sizeof(hw_tsomax));
  592         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  593                 cap &= lp->lp_ifp->if_capabilities;
  594                 hwa &= lp->lp_ifp->if_hwassist;
  595                 if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
  596         }
  597         cap = (cap == ~0 ? 0 : cap);
  598         hwa = (hwa == ~(uint64_t)0 ? 0 : hwa);
  599 
  600         if (sc->sc_ifp->if_capabilities != cap ||
  601             sc->sc_ifp->if_capenable != ena ||
  602             sc->sc_ifp->if_hwassist != hwa ||
  603             if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
  604                 sc->sc_ifp->if_capabilities = cap;
  605                 sc->sc_ifp->if_capenable = ena;
  606                 sc->sc_ifp->if_hwassist = hwa;
  607                 getmicrotime(&sc->sc_ifp->if_lastchange);
  608 
  609                 if (sc->sc_ifflags & IFF_DEBUG)
  610                         if_printf(sc->sc_ifp,
  611                             "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
  612         }
  613 }
  614 
  615 static int
  616 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
  617 {
  618         struct lagg_softc *sc_ptr;
  619         struct lagg_port *lp, *tlp;
  620         struct ifreq ifr;
  621         int error, i, oldmtu;
  622         uint64_t *pval;
  623 
  624         LAGG_XLOCK_ASSERT(sc);
  625 
  626         if (sc->sc_ifp == ifp) {
  627                 if_printf(sc->sc_ifp,
  628                     "cannot add a lagg to itself as a port\n");
  629                 return (EINVAL);
  630         }
  631 
  632         /* Limit the maximal number of lagg ports */
  633         if (sc->sc_count >= LAGG_MAX_PORTS)
  634                 return (ENOSPC);
  635 
  636         /* Check if port has already been associated to a lagg */
  637         if (ifp->if_lagg != NULL) {
  638                 /* Port is already in the current lagg? */
  639                 lp = (struct lagg_port *)ifp->if_lagg;
  640                 if (lp->lp_softc == sc)
  641                         return (EEXIST);
  642                 return (EBUSY);
  643         }
  644 
  645         /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
  646         if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
  647                 return (EPROTONOSUPPORT);
  648 
  649         /* Allow the first Ethernet member to define the MTU */
  650         oldmtu = -1;
  651         if (SLIST_EMPTY(&sc->sc_ports)) {
  652                 sc->sc_ifp->if_mtu = ifp->if_mtu;
  653         } else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
  654                 if (ifp->if_ioctl == NULL) {
  655                         if_printf(sc->sc_ifp, "cannot change MTU for %s\n",
  656                             ifp->if_xname);
  657                         return (EINVAL);
  658                 }
  659                 oldmtu = ifp->if_mtu;
  660                 strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name));
  661                 ifr.ifr_mtu = sc->sc_ifp->if_mtu;
  662                 error = (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
  663                 if (error != 0) {
  664                         if_printf(sc->sc_ifp, "invalid MTU for %s\n",
  665                             ifp->if_xname);
  666                         return (error);
  667                 }
  668                 ifr.ifr_mtu = oldmtu;
  669         }
  670 
  671         lp = malloc(sizeof(struct lagg_port), M_DEVBUF, M_WAITOK|M_ZERO);
  672         lp->lp_softc = sc;
  673 
  674         /* Check if port is a stacked lagg */
  675         LAGG_LIST_LOCK();
  676         SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
  677                 if (ifp == sc_ptr->sc_ifp) {
  678                         LAGG_LIST_UNLOCK();
  679                         free(lp, M_DEVBUF);
  680                         if (oldmtu != -1)
  681                                 (*ifp->if_ioctl)(ifp, SIOCSIFMTU,
  682                                     (caddr_t)&ifr);
  683                         return (EINVAL);
  684                         /* XXX disable stacking for the moment, its untested */
  685 #ifdef LAGG_PORT_STACKING
  686                         lp->lp_flags |= LAGG_PORT_STACK;
  687                         if (lagg_port_checkstacking(sc_ptr) >=
  688                             LAGG_MAX_STACKING) {
  689                                 LAGG_LIST_UNLOCK();
  690                                 free(lp, M_DEVBUF);
  691                                 if (oldmtu != -1)
  692                                         (*ifp->if_ioctl)(ifp, SIOCSIFMTU,
  693                                             (caddr_t)&ifr);
  694                                 return (E2BIG);
  695                         }
  696 #endif
  697                 }
  698         }
  699         LAGG_LIST_UNLOCK();
  700 
  701         if_ref(ifp);
  702         lp->lp_ifp = ifp;
  703 
  704         bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
  705         lp->lp_ifcapenable = ifp->if_capenable;
  706         if (SLIST_EMPTY(&sc->sc_ports)) {
  707                 LAGG_WLOCK(sc);
  708                 bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
  709                 lagg_proto_lladdr(sc);
  710                 LAGG_WUNLOCK(sc);
  711                 EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  712         } else {
  713                 if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
  714         }
  715         lagg_setflags(lp, 1);
  716 
  717         LAGG_WLOCK(sc);
  718         if (SLIST_EMPTY(&sc->sc_ports))
  719                 sc->sc_primary = lp;
  720 
  721         /* Change the interface type */
  722         lp->lp_iftype = ifp->if_type;
  723         ifp->if_type = IFT_IEEE8023ADLAG;
  724         ifp->if_lagg = lp;
  725         lp->lp_ioctl = ifp->if_ioctl;
  726         ifp->if_ioctl = lagg_port_ioctl;
  727         lp->lp_output = ifp->if_output;
  728         ifp->if_output = lagg_port_output;
  729 
  730         /* Read port counters */
  731         pval = lp->port_counters.val;
  732         for (i = 0; i < IFCOUNTERS; i++, pval++)
  733                 *pval = ifp->if_get_counter(ifp, i);
  734 
  735         /*
  736          * Insert into the list of ports.
  737          * Keep ports sorted by if_index. It is handy, when configuration
  738          * is predictable and `ifconfig laggN create ...` command
  739          * will lead to the same result each time.
  740          */
  741         SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
  742                 if (tlp->lp_ifp->if_index < ifp->if_index && (
  743                     SLIST_NEXT(tlp, lp_entries) == NULL ||
  744                     SLIST_NEXT(tlp, lp_entries)->lp_ifp->if_index >
  745                     ifp->if_index))
  746                         break;
  747         }
  748         if (tlp != NULL)
  749                 SLIST_INSERT_AFTER(tlp, lp, lp_entries);
  750         else
  751                 SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
  752         sc->sc_count++;
  753 
  754         lagg_setmulti(lp);
  755 
  756         if ((error = lagg_proto_addport(sc, lp)) != 0) {
  757                 /* Remove the port, without calling pr_delport. */
  758                 lagg_port_destroy(lp, 0);
  759                 if (oldmtu != -1)
  760                         (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
  761                 LAGG_UNLOCK_ASSERT(sc);
  762                 return (error);
  763         }
  764 
  765         LAGG_WUNLOCK(sc);
  766 
  767         /* Update lagg capabilities */
  768         lagg_capabilities(sc);
  769         lagg_linkstate(sc);
  770 
  771         return (0);
  772 }
  773 
  774 #ifdef LAGG_PORT_STACKING
  775 static int
  776 lagg_port_checkstacking(struct lagg_softc *sc)
  777 {
  778         struct lagg_softc *sc_ptr;
  779         struct lagg_port *lp;
  780         int m = 0;
  781 
  782         LAGG_SXLOCK_ASSERT(sc);
  783         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  784                 if (lp->lp_flags & LAGG_PORT_STACK) {
  785                         sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
  786                         m = MAX(m, lagg_port_checkstacking(sc_ptr));
  787                 }
  788         }
  789 
  790         return (m + 1);
  791 }
  792 #endif
  793 
  794 static int
  795 lagg_port_destroy(struct lagg_port *lp, int rundelport)
  796 {
  797         struct lagg_softc *sc = lp->lp_softc;
  798         struct lagg_port *lp_ptr, *lp0;
  799         struct ifnet *ifp = lp->lp_ifp;
  800         uint64_t *pval, vdiff;
  801         int i;
  802 
  803         LAGG_XLOCK_ASSERT(sc);
  804 
  805         if (rundelport) {
  806                 LAGG_WLOCK(sc);
  807                 lagg_proto_delport(sc, lp);
  808         } else
  809                 LAGG_WLOCK_ASSERT(sc);
  810 
  811         if (lp->lp_detaching == 0)
  812                 lagg_clrmulti(lp);
  813 
  814         /* Restore interface */
  815         ifp->if_type = lp->lp_iftype;
  816         ifp->if_ioctl = lp->lp_ioctl;
  817         ifp->if_output = lp->lp_output;
  818         ifp->if_lagg = NULL;
  819 
  820         /* Update detached port counters */
  821         pval = lp->port_counters.val;
  822         for (i = 0; i < IFCOUNTERS; i++, pval++) {
  823                 vdiff = ifp->if_get_counter(ifp, i) - *pval;
  824                 sc->detached_counters.val[i] += vdiff;
  825         }
  826 
  827         /* Finally, remove the port from the lagg */
  828         SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
  829         sc->sc_count--;
  830 
  831         /* Update the primary interface */
  832         if (lp == sc->sc_primary) {
  833                 uint8_t lladdr[ETHER_ADDR_LEN];
  834 
  835                 if ((lp0 = SLIST_FIRST(&sc->sc_ports)) == NULL)
  836                         bzero(&lladdr, ETHER_ADDR_LEN);
  837                 else
  838                         bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN);
  839                 sc->sc_primary = lp0;
  840                 if (sc->sc_destroying == 0) {
  841                         bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
  842                         lagg_proto_lladdr(sc);
  843                         LAGG_WUNLOCK(sc);
  844                         EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  845                 } else
  846                         LAGG_WUNLOCK(sc);
  847 
  848                 /*
  849                  * Update lladdr for each port (new primary needs update
  850                  * as well, to switch from old lladdr to its 'real' one)
  851                  */
  852                 SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
  853                         if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN);
  854         } else
  855                 LAGG_WUNLOCK(sc);
  856 
  857         if (lp->lp_ifflags)
  858                 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
  859 
  860         if (lp->lp_detaching == 0) {
  861                 lagg_setflags(lp, 0);
  862                 lagg_setcaps(lp, lp->lp_ifcapenable);
  863                 if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN);
  864         }
  865 
  866         if_rele(ifp);
  867         free(lp, M_DEVBUF);
  868 
  869         /* Update lagg capabilities */
  870         lagg_capabilities(sc);
  871         lagg_linkstate(sc);
  872 
  873         return (0);
  874 }
  875 
  876 static int
  877 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  878 {
  879         struct lagg_reqport *rp = (struct lagg_reqport *)data;
  880         struct lagg_softc *sc;
  881         struct lagg_port *lp = NULL;
  882         int error = 0;
  883 
  884         /* Should be checked by the caller */
  885         if (ifp->if_type != IFT_IEEE8023ADLAG ||
  886             (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
  887                 goto fallback;
  888 
  889         switch (cmd) {
  890         case SIOCGLAGGPORT:
  891                 if (rp->rp_portname[0] == '\0' ||
  892                     ifunit(rp->rp_portname) != ifp) {
  893                         error = EINVAL;
  894                         break;
  895                 }
  896 
  897                 LAGG_SLOCK(sc);
  898                 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
  899                         error = ENOENT;
  900                         LAGG_SUNLOCK(sc);
  901                         break;
  902                 }
  903 
  904                 lagg_port2req(lp, rp);
  905                 LAGG_SUNLOCK(sc);
  906                 break;
  907 
  908         case SIOCSIFCAP:
  909                 if (lp->lp_ioctl == NULL) {
  910                         error = EINVAL;
  911                         break;
  912                 }
  913                 error = (*lp->lp_ioctl)(ifp, cmd, data);
  914                 if (error)
  915                         break;
  916 
  917                 /* Update lagg interface capabilities */
  918                 LAGG_XLOCK(sc);
  919                 lagg_capabilities(sc);
  920                 LAGG_XUNLOCK(sc);
  921                 VLAN_CAPABILITIES(sc->sc_ifp);
  922                 break;
  923 
  924         case SIOCSIFMTU:
  925                 /* Do not allow the MTU to be changed once joined */
  926                 error = EINVAL;
  927                 break;
  928 
  929         default:
  930                 goto fallback;
  931         }
  932 
  933         return (error);
  934 
  935 fallback:
  936         if (lp != NULL && lp->lp_ioctl != NULL)
  937                 return ((*lp->lp_ioctl)(ifp, cmd, data));
  938 
  939         return (EINVAL);
  940 }
  941 
  942 /*
  943  * Requests counter @cnt data. 
  944  *
  945  * Counter value is calculated the following way:
  946  * 1) for each port, sum  difference between current and "initial" measurements.
  947  * 2) add lagg logical interface counters.
  948  * 3) add data from detached_counters array.
  949  *
  950  * We also do the following things on ports attach/detach:
  951  * 1) On port attach we store all counters it has into port_counter array. 
  952  * 2) On port detach we add the different between "initial" and
  953  *   current counters data to detached_counters array.
  954  */
  955 static uint64_t
  956 lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
  957 {
  958         struct lagg_softc *sc;
  959         struct lagg_port *lp;
  960         struct ifnet *lpifp;
  961         struct rm_priotracker tracker;
  962         uint64_t newval, oldval, vsum;
  963 
  964         /* Revise this when we've got non-generic counters. */
  965         KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
  966 
  967         sc = (struct lagg_softc *)ifp->if_softc;
  968         LAGG_RLOCK(sc, &tracker);
  969 
  970         vsum = 0;
  971         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  972                 /* Saved attached value */
  973                 oldval = lp->port_counters.val[cnt];
  974                 /* current value */
  975                 lpifp = lp->lp_ifp;
  976                 newval = lpifp->if_get_counter(lpifp, cnt);
  977                 /* Calculate diff and save new */
  978                 vsum += newval - oldval;
  979         }
  980 
  981         /*
  982          * Add counter data which might be added by upper
  983          * layer protocols operating on logical interface.
  984          */
  985         vsum += if_get_counter_default(ifp, cnt);
  986 
  987         /*
  988          * Add counter data from detached ports counters
  989          */
  990         vsum += sc->detached_counters.val[cnt];
  991 
  992         LAGG_RUNLOCK(sc, &tracker);
  993 
  994         return (vsum);
  995 }
  996 
  997 /*
  998  * For direct output to child ports.
  999  */
 1000 static int
 1001 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
 1002         const struct sockaddr *dst, struct route *ro)
 1003 {
 1004         struct lagg_port *lp = ifp->if_lagg;
 1005 
 1006         switch (dst->sa_family) {
 1007                 case pseudo_AF_HDRCMPLT:
 1008                 case AF_UNSPEC:
 1009                         return ((*lp->lp_output)(ifp, m, dst, ro));
 1010         }
 1011 
 1012         /* drop any other frames */
 1013         m_freem(m);
 1014         return (ENETDOWN);
 1015 }
 1016 
 1017 static void
 1018 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
 1019 {
 1020         struct lagg_port *lp;
 1021         struct lagg_softc *sc;
 1022 
 1023         if ((lp = ifp->if_lagg) == NULL)
 1024                 return;
 1025         /* If the ifnet is just being renamed, don't do anything. */
 1026         if (ifp->if_flags & IFF_RENAMING)
 1027                 return;
 1028 
 1029         sc = lp->lp_softc;
 1030 
 1031         LAGG_XLOCK(sc);
 1032         lp->lp_detaching = 1;
 1033         lagg_port_destroy(lp, 1);
 1034         LAGG_XUNLOCK(sc);
 1035         VLAN_CAPABILITIES(sc->sc_ifp);
 1036 }
 1037 
 1038 static void
 1039 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
 1040 {
 1041         struct lagg_softc *sc = lp->lp_softc;
 1042 
 1043         strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
 1044         strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
 1045         rp->rp_prio = lp->lp_prio;
 1046         rp->rp_flags = lp->lp_flags;
 1047         lagg_proto_portreq(sc, lp, &rp->rp_psc);
 1048 
 1049         /* Add protocol specific flags */
 1050         switch (sc->sc_proto) {
 1051                 case LAGG_PROTO_FAILOVER:
 1052                         if (lp == sc->sc_primary)
 1053                                 rp->rp_flags |= LAGG_PORT_MASTER;
 1054                         if (lp == lagg_link_active(sc, sc->sc_primary))
 1055                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1056                         break;
 1057 
 1058                 case LAGG_PROTO_ROUNDROBIN:
 1059                 case LAGG_PROTO_LOADBALANCE:
 1060                 case LAGG_PROTO_BROADCAST:
 1061                         if (LAGG_PORTACTIVE(lp))
 1062                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1063                         break;
 1064 
 1065                 case LAGG_PROTO_LACP:
 1066                         /* LACP has a different definition of active */
 1067                         if (lacp_isactive(lp))
 1068                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1069                         if (lacp_iscollecting(lp))
 1070                                 rp->rp_flags |= LAGG_PORT_COLLECTING;
 1071                         if (lacp_isdistributing(lp))
 1072                                 rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
 1073                         break;
 1074         }
 1075 
 1076 }
 1077 
 1078 static void
 1079 lagg_init(void *xsc)
 1080 {
 1081         struct lagg_softc *sc = (struct lagg_softc *)xsc;
 1082         struct ifnet *ifp = sc->sc_ifp;
 1083         struct lagg_port *lp;
 1084 
 1085         LAGG_XLOCK(sc);
 1086         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 1087                 LAGG_XUNLOCK(sc);
 1088                 return;
 1089         }
 1090 
 1091         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 1092 
 1093         /*
 1094          * Update the port lladdrs if needed.
 1095          * This might be if_setlladdr() notification
 1096          * that lladdr has been changed.
 1097          */
 1098         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1099                 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
 1100                     ETHER_ADDR_LEN) != 0)
 1101                         if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 1102         }
 1103 
 1104         lagg_proto_init(sc);
 1105 
 1106         LAGG_XUNLOCK(sc);
 1107 }
 1108 
 1109 static void
 1110 lagg_stop(struct lagg_softc *sc)
 1111 {
 1112         struct ifnet *ifp = sc->sc_ifp;
 1113 
 1114         LAGG_XLOCK_ASSERT(sc);
 1115 
 1116         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 1117                 return;
 1118 
 1119         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 1120 
 1121         lagg_proto_stop(sc);
 1122 }
 1123 
 1124 static int
 1125 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 1126 {
 1127         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1128         struct lagg_reqall *ra = (struct lagg_reqall *)data;
 1129         struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
 1130         struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
 1131         struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
 1132         struct ifreq *ifr = (struct ifreq *)data;
 1133         struct lagg_port *lp;
 1134         struct ifnet *tpif;
 1135         struct thread *td = curthread;
 1136         char *buf, *outbuf;
 1137         int count, buflen, len, error = 0;
 1138 
 1139         bzero(&rpbuf, sizeof(rpbuf));
 1140 
 1141         switch (cmd) {
 1142         case SIOCGLAGG:
 1143                 LAGG_SLOCK(sc);
 1144                 buflen = sc->sc_count * sizeof(struct lagg_reqport);
 1145                 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 1146                 ra->ra_proto = sc->sc_proto;
 1147                 lagg_proto_request(sc, &ra->ra_psc);
 1148                 count = 0;
 1149                 buf = outbuf;
 1150                 len = min(ra->ra_size, buflen);
 1151                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1152                         if (len < sizeof(rpbuf))
 1153                                 break;
 1154 
 1155                         lagg_port2req(lp, &rpbuf);
 1156                         memcpy(buf, &rpbuf, sizeof(rpbuf));
 1157                         count++;
 1158                         buf += sizeof(rpbuf);
 1159                         len -= sizeof(rpbuf);
 1160                 }
 1161                 LAGG_SUNLOCK(sc);
 1162                 ra->ra_ports = count;
 1163                 ra->ra_size = count * sizeof(rpbuf);
 1164                 error = copyout(outbuf, ra->ra_port, ra->ra_size);
 1165                 free(outbuf, M_TEMP);
 1166                 break;
 1167         case SIOCSLAGG:
 1168                 error = priv_check(td, PRIV_NET_LAGG);
 1169                 if (error)
 1170                         break;
 1171                 if (ra->ra_proto >= LAGG_PROTO_MAX) {
 1172                         error = EPROTONOSUPPORT;
 1173                         break;
 1174                 }
 1175 
 1176                 LAGG_XLOCK(sc);
 1177                 LAGG_WLOCK(sc);
 1178                 lagg_proto_detach(sc);
 1179                 LAGG_UNLOCK_ASSERT(sc);
 1180                 lagg_proto_attach(sc, ra->ra_proto);
 1181                 LAGG_XUNLOCK(sc);
 1182                 break;
 1183         case SIOCGLAGGOPTS:
 1184                 LAGG_SLOCK(sc);
 1185                 ro->ro_opts = sc->sc_opts;
 1186                 if (sc->sc_proto == LAGG_PROTO_LACP) {
 1187                         struct lacp_softc *lsc;
 1188 
 1189                         lsc = (struct lacp_softc *)sc->sc_psc;
 1190                         if (lsc->lsc_debug.lsc_tx_test != 0)
 1191                                 ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
 1192                         if (lsc->lsc_debug.lsc_rx_test != 0)
 1193                                 ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
 1194                         if (lsc->lsc_strict_mode != 0)
 1195                                 ro->ro_opts |= LAGG_OPT_LACP_STRICT;
 1196                         if (lsc->lsc_fast_timeout != 0)
 1197                                 ro->ro_opts |= LAGG_OPT_LACP_FAST_TIMO;
 1198 
 1199                         ro->ro_active = sc->sc_active;
 1200                 } else {
 1201                         ro->ro_active = 0;
 1202                         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1203                                 ro->ro_active += LAGG_PORTACTIVE(lp);
 1204                 }
 1205                 ro->ro_bkt = sc->sc_bkt;
 1206                 ro->ro_flapping = sc->sc_flapping;
 1207                 ro->ro_flowid_shift = sc->flowid_shift;
 1208                 LAGG_SUNLOCK(sc);
 1209                 break;
 1210         case SIOCSLAGGOPTS:
 1211                 if (sc->sc_proto == LAGG_PROTO_ROUNDROBIN) {
 1212                         if (ro->ro_bkt == 0)
 1213                                 sc->sc_bkt = 1; // Minimum 1 packet per iface.
 1214                         else
 1215                                 sc->sc_bkt = ro->ro_bkt;
 1216                 }
 1217                 error = priv_check(td, PRIV_NET_LAGG);
 1218                 if (error)
 1219                         break;
 1220                 if (ro->ro_opts == 0)
 1221                         break;
 1222                 /*
 1223                  * Set options.  LACP options are stored in sc->sc_psc,
 1224                  * not in sc_opts.
 1225                  */
 1226                 int valid, lacp;
 1227 
 1228                 switch (ro->ro_opts) {
 1229                 case LAGG_OPT_USE_FLOWID:
 1230                 case -LAGG_OPT_USE_FLOWID:
 1231                 case LAGG_OPT_FLOWIDSHIFT:
 1232                         valid = 1;
 1233                         lacp = 0;
 1234                         break;
 1235                 case LAGG_OPT_LACP_TXTEST:
 1236                 case -LAGG_OPT_LACP_TXTEST:
 1237                 case LAGG_OPT_LACP_RXTEST:
 1238                 case -LAGG_OPT_LACP_RXTEST:
 1239                 case LAGG_OPT_LACP_STRICT:
 1240                 case -LAGG_OPT_LACP_STRICT:
 1241                 case LAGG_OPT_LACP_FAST_TIMO:
 1242                 case -LAGG_OPT_LACP_FAST_TIMO:
 1243                         valid = lacp = 1;
 1244                         break;
 1245                 default:
 1246                         valid = lacp = 0;
 1247                         break;
 1248                 }
 1249 
 1250                 LAGG_XLOCK(sc);
 1251 
 1252                 if (valid == 0 ||
 1253                     (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
 1254                         /* Invalid combination of options specified. */
 1255                         error = EINVAL;
 1256                         LAGG_XUNLOCK(sc);
 1257                         break;  /* Return from SIOCSLAGGOPTS. */ 
 1258                 }
 1259                 /*
 1260                  * Store new options into sc->sc_opts except for
 1261                  * FLOWIDSHIFT and LACP options.
 1262                  */
 1263                 if (lacp == 0) {
 1264                         if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
 1265                                 sc->flowid_shift = ro->ro_flowid_shift;
 1266                         else if (ro->ro_opts > 0)
 1267                                 sc->sc_opts |= ro->ro_opts;
 1268                         else
 1269                                 sc->sc_opts &= ~ro->ro_opts;
 1270                 } else {
 1271                         struct lacp_softc *lsc;
 1272                         struct lacp_port *lp;
 1273 
 1274                         lsc = (struct lacp_softc *)sc->sc_psc;
 1275 
 1276                         switch (ro->ro_opts) {
 1277                         case LAGG_OPT_LACP_TXTEST:
 1278                                 lsc->lsc_debug.lsc_tx_test = 1;
 1279                                 break;
 1280                         case -LAGG_OPT_LACP_TXTEST:
 1281                                 lsc->lsc_debug.lsc_tx_test = 0;
 1282                                 break;
 1283                         case LAGG_OPT_LACP_RXTEST:
 1284                                 lsc->lsc_debug.lsc_rx_test = 1;
 1285                                 break;
 1286                         case -LAGG_OPT_LACP_RXTEST:
 1287                                 lsc->lsc_debug.lsc_rx_test = 0;
 1288                                 break;
 1289                         case LAGG_OPT_LACP_STRICT:
 1290                                 lsc->lsc_strict_mode = 1;
 1291                                 break;
 1292                         case -LAGG_OPT_LACP_STRICT:
 1293                                 lsc->lsc_strict_mode = 0;
 1294                                 break;
 1295                         case LAGG_OPT_LACP_FAST_TIMO:
 1296                                 LACP_LOCK(lsc);
 1297                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1298                                         lp->lp_state |= LACP_STATE_TIMEOUT;
 1299                                 LACP_UNLOCK(lsc);
 1300                                 lsc->lsc_fast_timeout = 1;
 1301                                 break;
 1302                         case -LAGG_OPT_LACP_FAST_TIMO:
 1303                                 LACP_LOCK(lsc);
 1304                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1305                                         lp->lp_state &= ~LACP_STATE_TIMEOUT;
 1306                                 LACP_UNLOCK(lsc);
 1307                                 lsc->lsc_fast_timeout = 0;
 1308                                 break;
 1309                         }
 1310                 }
 1311                 LAGG_XUNLOCK(sc);
 1312                 break;
 1313         case SIOCGLAGGFLAGS:
 1314                 rf->rf_flags = 0;
 1315                 LAGG_SLOCK(sc);
 1316                 if (sc->sc_flags & MBUF_HASHFLAG_L2)
 1317                         rf->rf_flags |= LAGG_F_HASHL2;
 1318                 if (sc->sc_flags & MBUF_HASHFLAG_L3)
 1319                         rf->rf_flags |= LAGG_F_HASHL3;
 1320                 if (sc->sc_flags & MBUF_HASHFLAG_L4)
 1321                         rf->rf_flags |= LAGG_F_HASHL4;
 1322                 LAGG_SUNLOCK(sc);
 1323                 break;
 1324         case SIOCSLAGGHASH:
 1325                 error = priv_check(td, PRIV_NET_LAGG);
 1326                 if (error)
 1327                         break;
 1328                 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
 1329                         error = EINVAL;
 1330                         break;
 1331                 }
 1332                 LAGG_XLOCK(sc);
 1333                 sc->sc_flags = 0;
 1334                 if (rf->rf_flags & LAGG_F_HASHL2)
 1335                         sc->sc_flags |= MBUF_HASHFLAG_L2;
 1336                 if (rf->rf_flags & LAGG_F_HASHL3)
 1337                         sc->sc_flags |= MBUF_HASHFLAG_L3;
 1338                 if (rf->rf_flags & LAGG_F_HASHL4)
 1339                         sc->sc_flags |= MBUF_HASHFLAG_L4;
 1340                 LAGG_XUNLOCK(sc);
 1341                 break;
 1342         case SIOCGLAGGPORT:
 1343                 if (rp->rp_portname[0] == '\0' ||
 1344                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1345                         error = EINVAL;
 1346                         break;
 1347                 }
 1348 
 1349                 LAGG_SLOCK(sc);
 1350                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1351                     lp->lp_softc != sc) {
 1352                         error = ENOENT;
 1353                         LAGG_SUNLOCK(sc);
 1354                         if_rele(tpif);
 1355                         break;
 1356                 }
 1357 
 1358                 lagg_port2req(lp, rp);
 1359                 LAGG_SUNLOCK(sc);
 1360                 if_rele(tpif);
 1361                 break;
 1362         case SIOCSLAGGPORT:
 1363                 error = priv_check(td, PRIV_NET_LAGG);
 1364                 if (error)
 1365                         break;
 1366                 if (rp->rp_portname[0] == '\0' ||
 1367                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1368                         error = EINVAL;
 1369                         break;
 1370                 }
 1371 #ifdef INET6
 1372                 /*
 1373                  * A laggport interface should not have inet6 address
 1374                  * because two interfaces with a valid link-local
 1375                  * scope zone must not be merged in any form.  This
 1376                  * restriction is needed to prevent violation of
 1377                  * link-local scope zone.  Attempts to add a laggport
 1378                  * interface which has inet6 addresses triggers
 1379                  * removal of all inet6 addresses on the member
 1380                  * interface.
 1381                  */
 1382                 if (in6ifa_llaonifp(tpif)) {
 1383                         in6_ifdetach(tpif);
 1384                                 if_printf(sc->sc_ifp,
 1385                                     "IPv6 addresses on %s have been removed "
 1386                                     "before adding it as a member to prevent "
 1387                                     "IPv6 address scope violation.\n",
 1388                                     tpif->if_xname);
 1389                 }
 1390 #endif
 1391                 LAGG_XLOCK(sc);
 1392                 error = lagg_port_create(sc, tpif);
 1393                 LAGG_XUNLOCK(sc);
 1394                 if_rele(tpif);
 1395                 VLAN_CAPABILITIES(ifp);
 1396                 break;
 1397         case SIOCSLAGGDELPORT:
 1398                 error = priv_check(td, PRIV_NET_LAGG);
 1399                 if (error)
 1400                         break;
 1401                 if (rp->rp_portname[0] == '\0' ||
 1402                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1403                         error = EINVAL;
 1404                         break;
 1405                 }
 1406 
 1407                 LAGG_XLOCK(sc);
 1408                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1409                     lp->lp_softc != sc) {
 1410                         error = ENOENT;
 1411                         LAGG_XUNLOCK(sc);
 1412                         if_rele(tpif);
 1413                         break;
 1414                 }
 1415 
 1416                 error = lagg_port_destroy(lp, 1);
 1417                 LAGG_XUNLOCK(sc);
 1418                 if_rele(tpif);
 1419                 VLAN_CAPABILITIES(ifp);
 1420                 break;
 1421         case SIOCSIFFLAGS:
 1422                 /* Set flags on ports too */
 1423                 LAGG_XLOCK(sc);
 1424                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1425                         lagg_setflags(lp, 1);
 1426                 }
 1427 
 1428                 if (!(ifp->if_flags & IFF_UP) &&
 1429                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1430                         /*
 1431                          * If interface is marked down and it is running,
 1432                          * then stop and disable it.
 1433                          */
 1434                         lagg_stop(sc);
 1435                         LAGG_XUNLOCK(sc);
 1436                 } else if ((ifp->if_flags & IFF_UP) &&
 1437                     !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1438                         /*
 1439                          * If interface is marked up and it is stopped, then
 1440                          * start it.
 1441                          */
 1442                         LAGG_XUNLOCK(sc);
 1443                         (*ifp->if_init)(sc);
 1444                 } else
 1445                         LAGG_XUNLOCK(sc);
 1446                 break;
 1447         case SIOCADDMULTI:
 1448         case SIOCDELMULTI:
 1449                 LAGG_WLOCK(sc);
 1450                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1451                         lagg_clrmulti(lp);
 1452                         lagg_setmulti(lp);
 1453                 }
 1454                 LAGG_WUNLOCK(sc);
 1455                 error = 0;
 1456                 break;
 1457         case SIOCSIFMEDIA:
 1458         case SIOCGIFMEDIA:
 1459                 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 1460                 break;
 1461 
 1462         case SIOCSIFCAP:
 1463                 LAGG_XLOCK(sc);
 1464                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1465                         if (lp->lp_ioctl != NULL)
 1466                                 (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1467                 }
 1468                 lagg_capabilities(sc);
 1469                 LAGG_XUNLOCK(sc);
 1470                 VLAN_CAPABILITIES(ifp);
 1471                 error = 0;
 1472                 break;
 1473 
 1474         case SIOCSIFMTU:
 1475                 LAGG_XLOCK(sc);
 1476                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1477                         if (lp->lp_ioctl != NULL)
 1478                                 error = (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1479                         else
 1480                                 error = EINVAL;
 1481                         if (error != 0) {
 1482                                 if_printf(ifp,
 1483                                     "failed to change MTU to %d on port %s, "
 1484                                     "reverting all ports to original MTU (%d)\n",
 1485                                     ifr->ifr_mtu, lp->lp_ifp->if_xname, ifp->if_mtu);
 1486                                 break;
 1487                         }
 1488                 }
 1489                 if (error == 0) {
 1490                         ifp->if_mtu = ifr->ifr_mtu;
 1491                 } else {
 1492                         /* set every port back to the original MTU */
 1493                         ifr->ifr_mtu = ifp->if_mtu;
 1494                         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1495                                 if (lp->lp_ioctl != NULL)
 1496                                         (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1497                         }
 1498                 }
 1499                 LAGG_XUNLOCK(sc);
 1500                 break;
 1501 
 1502         default:
 1503                 error = ether_ioctl(ifp, cmd, data);
 1504                 break;
 1505         }
 1506         return (error);
 1507 }
 1508 
 1509 static int
 1510 lagg_setmulti(struct lagg_port *lp)
 1511 {
 1512         struct lagg_softc *sc = lp->lp_softc;
 1513         struct ifnet *ifp = lp->lp_ifp;
 1514         struct ifnet *scifp = sc->sc_ifp;
 1515         struct lagg_mc *mc;
 1516         struct ifmultiaddr *ifma;
 1517         int error;
 1518 
 1519         LAGG_WLOCK_ASSERT(sc);
 1520         IF_ADDR_WLOCK(scifp);
 1521         TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
 1522                 if (ifma->ifma_addr->sa_family != AF_LINK)
 1523                         continue;
 1524                 mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
 1525                 if (mc == NULL) {
 1526                         IF_ADDR_WUNLOCK(scifp);
 1527                         return (ENOMEM);
 1528                 }
 1529                 bcopy(ifma->ifma_addr, &mc->mc_addr,
 1530                     ifma->ifma_addr->sa_len);
 1531                 mc->mc_addr.sdl_index = ifp->if_index;
 1532                 mc->mc_ifma = NULL;
 1533                 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
 1534         }
 1535         IF_ADDR_WUNLOCK(scifp);
 1536         SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
 1537                 error = if_addmulti(ifp,
 1538                     (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
 1539                 if (error)
 1540                         return (error);
 1541         }
 1542         return (0);
 1543 }
 1544 
 1545 static int
 1546 lagg_clrmulti(struct lagg_port *lp)
 1547 {
 1548         struct lagg_mc *mc;
 1549 
 1550         LAGG_WLOCK_ASSERT(lp->lp_softc);
 1551         while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
 1552                 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
 1553                 if (mc->mc_ifma && lp->lp_detaching == 0)
 1554                         if_delmulti_ifma(mc->mc_ifma);
 1555                 free(mc, M_DEVBUF);
 1556         }
 1557         return (0);
 1558 }
 1559 
 1560 static int
 1561 lagg_setcaps(struct lagg_port *lp, int cap)
 1562 {
 1563         struct ifreq ifr;
 1564 
 1565         if (lp->lp_ifp->if_capenable == cap)
 1566                 return (0);
 1567         if (lp->lp_ioctl == NULL)
 1568                 return (ENXIO);
 1569         ifr.ifr_reqcap = cap;
 1570         return ((*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAP, (caddr_t)&ifr));
 1571 }
 1572 
 1573 /* Handle a ref counted flag that should be set on the lagg port as well */
 1574 static int
 1575 lagg_setflag(struct lagg_port *lp, int flag, int status,
 1576     int (*func)(struct ifnet *, int))
 1577 {
 1578         struct lagg_softc *sc = lp->lp_softc;
 1579         struct ifnet *scifp = sc->sc_ifp;
 1580         struct ifnet *ifp = lp->lp_ifp;
 1581         int error;
 1582 
 1583         LAGG_XLOCK_ASSERT(sc);
 1584 
 1585         status = status ? (scifp->if_flags & flag) : 0;
 1586         /* Now "status" contains the flag value or 0 */
 1587 
 1588         /*
 1589          * See if recorded ports status is different from what
 1590          * we want it to be.  If it is, flip it.  We record ports
 1591          * status in lp_ifflags so that we won't clear ports flag
 1592          * we haven't set.  In fact, we don't clear or set ports
 1593          * flags directly, but get or release references to them.
 1594          * That's why we can be sure that recorded flags still are
 1595          * in accord with actual ports flags.
 1596          */
 1597         if (status != (lp->lp_ifflags & flag)) {
 1598                 error = (*func)(ifp, status);
 1599                 if (error)
 1600                         return (error);
 1601                 lp->lp_ifflags &= ~flag;
 1602                 lp->lp_ifflags |= status;
 1603         }
 1604         return (0);
 1605 }
 1606 
 1607 /*
 1608  * Handle IFF_* flags that require certain changes on the lagg port
 1609  * if "status" is true, update ports flags respective to the lagg
 1610  * if "status" is false, forcedly clear the flags set on port.
 1611  */
 1612 static int
 1613 lagg_setflags(struct lagg_port *lp, int status)
 1614 {
 1615         int error, i;
 1616 
 1617         for (i = 0; lagg_pflags[i].flag; i++) {
 1618                 error = lagg_setflag(lp, lagg_pflags[i].flag,
 1619                     status, lagg_pflags[i].func);
 1620                 if (error)
 1621                         return (error);
 1622         }
 1623         return (0);
 1624 }
 1625 
 1626 static int
 1627 lagg_transmit(struct ifnet *ifp, struct mbuf *m)
 1628 {
 1629         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1630         int error, len, mcast;
 1631         struct rm_priotracker tracker;
 1632 
 1633         len = m->m_pkthdr.len;
 1634         mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 1635 
 1636         LAGG_RLOCK(sc, &tracker);
 1637         /* We need a Tx algorithm and at least one port */
 1638         if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
 1639                 LAGG_RUNLOCK(sc, &tracker);
 1640                 m_freem(m);
 1641                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1642                 return (ENXIO);
 1643         }
 1644 
 1645         ETHER_BPF_MTAP(ifp, m);
 1646 
 1647         error = lagg_proto_start(sc, m);
 1648         LAGG_RUNLOCK(sc, &tracker);
 1649 
 1650         if (error != 0)
 1651                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 1652 
 1653         return (error);
 1654 }
 1655 
 1656 /*
 1657  * The ifp->if_qflush entry point for lagg(4) is no-op.
 1658  */
 1659 static void
 1660 lagg_qflush(struct ifnet *ifp __unused)
 1661 {
 1662 }
 1663 
 1664 static struct mbuf *
 1665 lagg_input(struct ifnet *ifp, struct mbuf *m)
 1666 {
 1667         struct lagg_port *lp = ifp->if_lagg;
 1668         struct lagg_softc *sc = lp->lp_softc;
 1669         struct ifnet *scifp = sc->sc_ifp;
 1670         struct rm_priotracker tracker;
 1671 
 1672         LAGG_RLOCK(sc, &tracker);
 1673         if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 1674             (lp->lp_flags & LAGG_PORT_DISABLED) ||
 1675             sc->sc_proto == LAGG_PROTO_NONE) {
 1676                 LAGG_RUNLOCK(sc, &tracker);
 1677                 m_freem(m);
 1678                 return (NULL);
 1679         }
 1680 
 1681         ETHER_BPF_MTAP(scifp, m);
 1682 
 1683         if (lp->lp_detaching != 0) {
 1684                 m_freem(m);
 1685                 m = NULL;
 1686         } else
 1687                 m = lagg_proto_input(sc, lp, m);
 1688 
 1689         if (m != NULL) {
 1690                 if (scifp->if_flags & IFF_MONITOR) {
 1691                         m_freem(m);
 1692                         m = NULL;
 1693                 }
 1694         }
 1695 
 1696         LAGG_RUNLOCK(sc, &tracker);
 1697         return (m);
 1698 }
 1699 
 1700 static int
 1701 lagg_media_change(struct ifnet *ifp)
 1702 {
 1703         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1704 
 1705         if (sc->sc_ifflags & IFF_DEBUG)
 1706                 printf("%s\n", __func__);
 1707 
 1708         /* Ignore */
 1709         return (0);
 1710 }
 1711 
 1712 static void
 1713 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
 1714 {
 1715         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1716         struct lagg_port *lp;
 1717 
 1718         imr->ifm_status = IFM_AVALID;
 1719         imr->ifm_active = IFM_ETHER | IFM_AUTO;
 1720 
 1721         LAGG_SLOCK(sc);
 1722         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1723                 if (LAGG_PORTACTIVE(lp))
 1724                         imr->ifm_status |= IFM_ACTIVE;
 1725         }
 1726         LAGG_SUNLOCK(sc);
 1727 }
 1728 
 1729 static void
 1730 lagg_linkstate(struct lagg_softc *sc)
 1731 {
 1732         struct lagg_port *lp;
 1733         int new_link = LINK_STATE_DOWN;
 1734         uint64_t speed;
 1735 
 1736         LAGG_XLOCK_ASSERT(sc);
 1737 
 1738         /* Our link is considered up if at least one of our ports is active */
 1739         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1740                 if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
 1741                         new_link = LINK_STATE_UP;
 1742                         break;
 1743                 }
 1744         }
 1745         if_link_state_change(sc->sc_ifp, new_link);
 1746 
 1747         /* Update if_baudrate to reflect the max possible speed */
 1748         switch (sc->sc_proto) {
 1749                 case LAGG_PROTO_FAILOVER:
 1750                         sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
 1751                             sc->sc_primary->lp_ifp->if_baudrate : 0;
 1752                         break;
 1753                 case LAGG_PROTO_ROUNDROBIN:
 1754                 case LAGG_PROTO_LOADBALANCE:
 1755                 case LAGG_PROTO_BROADCAST:
 1756                         speed = 0;
 1757                         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1758                                 speed += lp->lp_ifp->if_baudrate;
 1759                         sc->sc_ifp->if_baudrate = speed;
 1760                         break;
 1761                 case LAGG_PROTO_LACP:
 1762                         /* LACP updates if_baudrate itself */
 1763                         break;
 1764         }
 1765 }
 1766 
 1767 static void
 1768 lagg_port_state(struct ifnet *ifp, int state)
 1769 {
 1770         struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
 1771         struct lagg_softc *sc = NULL;
 1772 
 1773         if (lp != NULL)
 1774                 sc = lp->lp_softc;
 1775         if (sc == NULL)
 1776                 return;
 1777 
 1778         LAGG_XLOCK(sc);
 1779         lagg_linkstate(sc);
 1780         lagg_proto_linkstate(sc, lp);
 1781         LAGG_XUNLOCK(sc);
 1782 }
 1783 
 1784 struct lagg_port *
 1785 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
 1786 {
 1787         struct lagg_port *lp_next, *rval = NULL;
 1788 
 1789         /*
 1790          * Search a port which reports an active link state.
 1791          */
 1792 
 1793         if (lp == NULL)
 1794                 goto search;
 1795         if (LAGG_PORTACTIVE(lp)) {
 1796                 rval = lp;
 1797                 goto found;
 1798         }
 1799         if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
 1800             LAGG_PORTACTIVE(lp_next)) {
 1801                 rval = lp_next;
 1802                 goto found;
 1803         }
 1804 
 1805 search:
 1806         SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 1807                 if (LAGG_PORTACTIVE(lp_next)) {
 1808                         rval = lp_next;
 1809                         goto found;
 1810                 }
 1811         }
 1812 
 1813 found:
 1814         return (rval);
 1815 }
 1816 
 1817 int
 1818 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
 1819 {
 1820 
 1821         return (ifp->if_transmit)(ifp, m);
 1822 }
 1823 
 1824 /*
 1825  * Simple round robin aggregation
 1826  */
 1827 static void
 1828 lagg_rr_attach(struct lagg_softc *sc)
 1829 {
 1830         sc->sc_seq = 0;
 1831         sc->sc_bkt_count = sc->sc_bkt;
 1832 }
 1833 
 1834 static int
 1835 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
 1836 {
 1837         struct lagg_port *lp;
 1838         uint32_t p;
 1839 
 1840         if (sc->sc_bkt_count == 0 && sc->sc_bkt > 0)
 1841                 sc->sc_bkt_count = sc->sc_bkt;
 1842 
 1843         if (sc->sc_bkt > 0) {
 1844                 atomic_subtract_int(&sc->sc_bkt_count, 1);
 1845         if (atomic_cmpset_int(&sc->sc_bkt_count, 0, sc->sc_bkt))
 1846                 p = atomic_fetchadd_32(&sc->sc_seq, 1);
 1847         else
 1848                 p = sc->sc_seq; 
 1849         } else
 1850                 p = atomic_fetchadd_32(&sc->sc_seq, 1);
 1851 
 1852         p %= sc->sc_count;
 1853         lp = SLIST_FIRST(&sc->sc_ports);
 1854 
 1855         while (p--)
 1856                 lp = SLIST_NEXT(lp, lp_entries);
 1857 
 1858         /*
 1859          * Check the port's link state. This will return the next active
 1860          * port if the link is down or the port is NULL.
 1861          */
 1862         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 1863                 m_freem(m);
 1864                 return (ENETDOWN);
 1865         }
 1866 
 1867         /* Send mbuf */
 1868         return (lagg_enqueue(lp->lp_ifp, m));
 1869 }
 1870 
 1871 static struct mbuf *
 1872 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 1873 {
 1874         struct ifnet *ifp = sc->sc_ifp;
 1875 
 1876         /* Just pass in the packet to our lagg device */
 1877         m->m_pkthdr.rcvif = ifp;
 1878 
 1879         return (m);
 1880 }
 1881 
 1882 /*
 1883  * Broadcast mode
 1884  */
 1885 static int
 1886 lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
 1887 {
 1888         int active_ports = 0;
 1889         int errors = 0;
 1890         int ret;
 1891         struct lagg_port *lp, *last = NULL;
 1892         struct mbuf *m0;
 1893 
 1894         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1895                 if (!LAGG_PORTACTIVE(lp))
 1896                         continue;
 1897 
 1898                 active_ports++;
 1899 
 1900                 if (last != NULL) {
 1901                         m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 1902                         if (m0 == NULL) {
 1903                                 ret = ENOBUFS;
 1904                                 errors++;
 1905                                 break;
 1906                         }
 1907 
 1908                         ret = lagg_enqueue(last->lp_ifp, m0);
 1909                         if (ret != 0)
 1910                                 errors++;
 1911                 }
 1912                 last = lp;
 1913         }
 1914         if (last == NULL) {
 1915                 m_freem(m);
 1916                 return (ENOENT);
 1917         }
 1918         if ((last = lagg_link_active(sc, last)) == NULL) {
 1919                 m_freem(m);
 1920                 return (ENETDOWN);
 1921         }
 1922 
 1923         ret = lagg_enqueue(last->lp_ifp, m);
 1924         if (ret != 0)
 1925                 errors++;
 1926 
 1927         if (errors == 0)
 1928                 return (ret);
 1929 
 1930         return (0);
 1931 }
 1932 
 1933 static struct mbuf*
 1934 lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 1935 {
 1936         struct ifnet *ifp = sc->sc_ifp;
 1937 
 1938         /* Just pass in the packet to our lagg device */
 1939         m->m_pkthdr.rcvif = ifp;
 1940         return (m);
 1941 }
 1942 
 1943 /*
 1944  * Active failover
 1945  */
 1946 static int
 1947 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
 1948 {
 1949         struct lagg_port *lp;
 1950 
 1951         /* Use the master port if active or the next available port */
 1952         if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
 1953                 m_freem(m);
 1954                 return (ENETDOWN);
 1955         }
 1956 
 1957         /* Send mbuf */
 1958         return (lagg_enqueue(lp->lp_ifp, m));
 1959 }
 1960 
 1961 static struct mbuf *
 1962 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 1963 {
 1964         struct ifnet *ifp = sc->sc_ifp;
 1965         struct lagg_port *tmp_tp;
 1966 
 1967         if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
 1968                 m->m_pkthdr.rcvif = ifp;
 1969                 return (m);
 1970         }
 1971 
 1972         if (!LAGG_PORTACTIVE(sc->sc_primary)) {
 1973                 tmp_tp = lagg_link_active(sc, sc->sc_primary);
 1974                 /*
 1975                  * If tmp_tp is null, we've received a packet when all
 1976                  * our links are down. Weird, but process it anyways.
 1977                  */
 1978                 if ((tmp_tp == NULL || tmp_tp == lp)) {
 1979                         m->m_pkthdr.rcvif = ifp;
 1980                         return (m);
 1981                 }
 1982         }
 1983 
 1984         m_freem(m);
 1985         return (NULL);
 1986 }
 1987 
 1988 /*
 1989  * Loadbalancing
 1990  */
 1991 static void
 1992 lagg_lb_attach(struct lagg_softc *sc)
 1993 {
 1994         struct lagg_port *lp;
 1995         struct lagg_lb *lb;
 1996 
 1997         lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
 1998         lb->lb_key = m_ether_tcpip_hash_init();
 1999         sc->sc_psc = lb;
 2000 
 2001         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2002                 lagg_lb_port_create(lp);
 2003 }
 2004 
 2005 static void
 2006 lagg_lb_detach(struct lagg_softc *sc)
 2007 {
 2008         struct lagg_lb *lb;
 2009 
 2010         lb = (struct lagg_lb *)sc->sc_psc;
 2011         LAGG_WUNLOCK(sc);
 2012         if (lb != NULL)
 2013                 free(lb, M_DEVBUF);
 2014 }
 2015 
 2016 static int
 2017 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
 2018 {
 2019         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 2020         struct lagg_port *lp_next;
 2021         int i = 0;
 2022 
 2023         bzero(&lb->lb_ports, sizeof(lb->lb_ports));
 2024         SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 2025                 if (lp_next == lp)
 2026                         continue;
 2027                 if (i >= LAGG_MAX_PORTS)
 2028                         return (EINVAL);
 2029                 if (sc->sc_ifflags & IFF_DEBUG)
 2030                         printf("%s: port %s at index %d\n",
 2031                             sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
 2032                 lb->lb_ports[i++] = lp_next;
 2033         }
 2034 
 2035         return (0);
 2036 }
 2037 
 2038 static int
 2039 lagg_lb_port_create(struct lagg_port *lp)
 2040 {
 2041         struct lagg_softc *sc = lp->lp_softc;
 2042         return (lagg_lb_porttable(sc, NULL));
 2043 }
 2044 
 2045 static void
 2046 lagg_lb_port_destroy(struct lagg_port *lp)
 2047 {
 2048         struct lagg_softc *sc = lp->lp_softc;
 2049         lagg_lb_porttable(sc, lp);
 2050 }
 2051 
 2052 static int
 2053 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
 2054 {
 2055         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 2056         struct lagg_port *lp = NULL;
 2057         uint32_t p = 0;
 2058 
 2059         if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
 2060             M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 2061                 p = m->m_pkthdr.flowid >> sc->flowid_shift;
 2062         else
 2063                 p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
 2064         p %= sc->sc_count;
 2065         lp = lb->lb_ports[p];
 2066 
 2067         /*
 2068          * Check the port's link state. This will return the next active
 2069          * port if the link is down or the port is NULL.
 2070          */
 2071         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 2072                 m_freem(m);
 2073                 return (ENETDOWN);
 2074         }
 2075 
 2076         /* Send mbuf */
 2077         return (lagg_enqueue(lp->lp_ifp, m));
 2078 }
 2079 
 2080 static struct mbuf *
 2081 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2082 {
 2083         struct ifnet *ifp = sc->sc_ifp;
 2084 
 2085         /* Just pass in the packet to our lagg device */
 2086         m->m_pkthdr.rcvif = ifp;
 2087 
 2088         return (m);
 2089 }
 2090 
 2091 /*
 2092  * 802.3ad LACP
 2093  */
 2094 static void
 2095 lagg_lacp_attach(struct lagg_softc *sc)
 2096 {
 2097         struct lagg_port *lp;
 2098 
 2099         lacp_attach(sc);
 2100         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2101                 lacp_port_create(lp);
 2102 }
 2103 
 2104 static void
 2105 lagg_lacp_detach(struct lagg_softc *sc)
 2106 {
 2107         struct lagg_port *lp;
 2108         void *psc;
 2109 
 2110         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2111                 lacp_port_destroy(lp);
 2112 
 2113         psc = sc->sc_psc;
 2114         sc->sc_psc = NULL;
 2115         LAGG_WUNLOCK(sc);
 2116 
 2117         lacp_detach(psc);
 2118 }
 2119 
 2120 static void
 2121 lagg_lacp_lladdr(struct lagg_softc *sc)
 2122 {
 2123         struct lagg_port *lp;
 2124 
 2125         LAGG_SXLOCK_ASSERT(sc);
 2126 
 2127         /* purge all the lacp ports */
 2128         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2129                 lacp_port_destroy(lp);
 2130 
 2131         /* add them back in */
 2132         SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2133                 lacp_port_create(lp);
 2134 }
 2135 
 2136 static int
 2137 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
 2138 {
 2139         struct lagg_port *lp;
 2140 
 2141         lp = lacp_select_tx_port(sc, m);
 2142         if (lp == NULL) {
 2143                 m_freem(m);
 2144                 return (ENETDOWN);
 2145         }
 2146 
 2147         /* Send mbuf */
 2148         return (lagg_enqueue(lp->lp_ifp, m));
 2149 }
 2150 
 2151 static struct mbuf *
 2152 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2153 {
 2154         struct ifnet *ifp = sc->sc_ifp;
 2155         struct ether_header *eh;
 2156         u_short etype;
 2157 
 2158         eh = mtod(m, struct ether_header *);
 2159         etype = ntohs(eh->ether_type);
 2160 
 2161         /* Tap off LACP control messages */
 2162         if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
 2163                 m = lacp_input(lp, m);
 2164                 if (m == NULL)
 2165                         return (NULL);
 2166         }
 2167 
 2168         /*
 2169          * If the port is not collecting or not in the active aggregator then
 2170          * free and return.
 2171          */
 2172         if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
 2173                 m_freem(m);
 2174                 return (NULL);
 2175         }
 2176 
 2177         m->m_pkthdr.rcvif = ifp;
 2178         return (m);
 2179 }
 2180 

Cache object: 7dec9a5fcd50fe860665ac4f4855a578


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.