The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_lagg.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $      */
    2 
    3 /*
    4  * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
    5  * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
    6  * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
    7  *
    8  * Permission to use, copy, modify, and distribute this software for any
    9  * purpose with or without fee is hereby granted, provided that the above
   10  * copyright notice and this permission notice appear in all copies.
   11  *
   12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   19  */
   20 
   21 #include <sys/cdefs.h>
   22 __FBSDID("$FreeBSD$");
   23 
   24 #include "opt_inet.h"
   25 #include "opt_inet6.h"
   26 #include "opt_kern_tls.h"
   27 #include "opt_ratelimit.h"
   28 
   29 #include <sys/param.h>
   30 #include <sys/kernel.h>
   31 #include <sys/malloc.h>
   32 #include <sys/mbuf.h>
   33 #include <sys/queue.h>
   34 #include <sys/socket.h>
   35 #include <sys/sockio.h>
   36 #include <sys/sysctl.h>
   37 #include <sys/module.h>
   38 #include <sys/priv.h>
   39 #include <sys/systm.h>
   40 #include <sys/proc.h>
   41 #include <sys/lock.h>
   42 #include <sys/rmlock.h>
   43 #include <sys/sx.h>
   44 #include <sys/taskqueue.h>
   45 #include <sys/eventhandler.h>
   46 
   47 #include <net/ethernet.h>
   48 #include <net/if.h>
   49 #include <net/if_clone.h>
   50 #include <net/if_arp.h>
   51 #include <net/if_dl.h>
   52 #include <net/if_media.h>
   53 #include <net/if_types.h>
   54 #include <net/if_var.h>
   55 #include <net/if_private.h>
   56 #include <net/bpf.h>
   57 #include <net/route.h>
   58 #include <net/vnet.h>
   59 #include <net/infiniband.h>
   60 
   61 #if defined(INET) || defined(INET6)
   62 #include <netinet/in.h>
   63 #include <netinet/ip.h>
   64 #endif
   65 #ifdef INET
   66 #include <netinet/in_systm.h>
   67 #include <netinet/if_ether.h>
   68 #endif
   69 
   70 #ifdef INET6
   71 #include <netinet/ip6.h>
   72 #include <netinet6/in6_var.h>
   73 #include <netinet6/in6_ifattach.h>
   74 #endif
   75 
   76 #include <net/if_vlan_var.h>
   77 #include <net/if_lagg.h>
   78 #include <net/ieee8023ad_lacp.h>
   79 
   80 #ifdef INET6
   81 /*
   82  * XXX: declare here to avoid to include many inet6 related files..
   83  * should be more generalized?
   84  */
   85 extern void     nd6_setmtu(struct ifnet *);
   86 #endif
   87 
   88 #ifdef DEV_NETMAP
   89 MODULE_DEPEND(if_lagg, netmap, 1, 1, 1);
   90 #endif
   91 
   92 #define LAGG_SX_INIT(_sc)       sx_init(&(_sc)->sc_sx, "if_lagg sx")
   93 #define LAGG_SX_DESTROY(_sc)    sx_destroy(&(_sc)->sc_sx)
   94 #define LAGG_XLOCK(_sc)         sx_xlock(&(_sc)->sc_sx)
   95 #define LAGG_XUNLOCK(_sc)       sx_xunlock(&(_sc)->sc_sx)
   96 #define LAGG_SXLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_LOCKED)
   97 #define LAGG_XLOCK_ASSERT(_sc)  sx_assert(&(_sc)->sc_sx, SA_XLOCKED)
   98 
   99 /* Special flags we should propagate to the lagg ports. */
  100 static struct {
  101         int flag;
  102         int (*func)(struct ifnet *, int);
  103 } lagg_pflags[] = {
  104         {IFF_PROMISC, ifpromisc},
  105         {IFF_ALLMULTI, if_allmulti},
  106         {0, NULL}
  107 };
  108 
  109 struct lagg_snd_tag {
  110         struct m_snd_tag com;
  111         struct m_snd_tag *tag;
  112 };
  113 
  114 VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
  115 #define V_lagg_list     VNET(lagg_list)
  116 VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
  117 #define V_lagg_list_mtx VNET(lagg_list_mtx)
  118 #define LAGG_LIST_LOCK_INIT(x)          mtx_init(&V_lagg_list_mtx, \
  119                                         "if_lagg list", NULL, MTX_DEF)
  120 #define LAGG_LIST_LOCK_DESTROY(x)       mtx_destroy(&V_lagg_list_mtx)
  121 #define LAGG_LIST_LOCK(x)               mtx_lock(&V_lagg_list_mtx)
  122 #define LAGG_LIST_UNLOCK(x)             mtx_unlock(&V_lagg_list_mtx)
  123 eventhandler_tag        lagg_detach_cookie = NULL;
  124 
  125 static int      lagg_clone_create(struct if_clone *, char *, size_t,
  126                     struct ifc_data *, struct ifnet **);
  127 static int      lagg_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
  128 VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner);
  129 #define V_lagg_cloner   VNET(lagg_cloner)
  130 static const char laggname[] = "lagg";
  131 static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface");
  132 
  133 static void     lagg_capabilities(struct lagg_softc *);
  134 static int      lagg_port_create(struct lagg_softc *, struct ifnet *);
  135 static int      lagg_port_destroy(struct lagg_port *, int);
  136 static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *);
  137 static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *);
  138 static void     lagg_linkstate(struct lagg_softc *);
  139 static void     lagg_port_state(struct ifnet *, int);
  140 static int      lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
  141 static int      lagg_port_output(struct ifnet *, struct mbuf *,
  142                     const struct sockaddr *, struct route *);
  143 static void     lagg_port_ifdetach(void *arg __unused, struct ifnet *);
  144 #ifdef LAGG_PORT_STACKING
  145 static int      lagg_port_checkstacking(struct lagg_softc *);
  146 #endif
  147 static void     lagg_port2req(struct lagg_port *, struct lagg_reqport *);
  148 static void     lagg_init(void *);
  149 static void     lagg_stop(struct lagg_softc *);
  150 static int      lagg_ioctl(struct ifnet *, u_long, caddr_t);
  151 #if defined(KERN_TLS) || defined(RATELIMIT)
  152 static int      lagg_snd_tag_alloc(struct ifnet *,
  153                     union if_snd_tag_alloc_params *,
  154                     struct m_snd_tag **);
  155 static int      lagg_snd_tag_modify(struct m_snd_tag *,
  156                     union if_snd_tag_modify_params *);
  157 static int      lagg_snd_tag_query(struct m_snd_tag *,
  158                     union if_snd_tag_query_params *);
  159 static void     lagg_snd_tag_free(struct m_snd_tag *);
  160 static struct m_snd_tag *lagg_next_snd_tag(struct m_snd_tag *);
  161 static void     lagg_ratelimit_query(struct ifnet *,
  162                     struct if_ratelimit_query_results *);
  163 #endif
  164 static int      lagg_setmulti(struct lagg_port *);
  165 static int      lagg_clrmulti(struct lagg_port *);
  166 static  void    lagg_setcaps(struct lagg_port *, int cap, int cap2);
  167 static  int     lagg_setflag(struct lagg_port *, int, int,
  168                     int (*func)(struct ifnet *, int));
  169 static  int     lagg_setflags(struct lagg_port *, int status);
  170 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
  171 static int      lagg_transmit_ethernet(struct ifnet *, struct mbuf *);
  172 static int      lagg_transmit_infiniband(struct ifnet *, struct mbuf *);
  173 static void     lagg_qflush(struct ifnet *);
  174 static int      lagg_media_change(struct ifnet *);
  175 static void     lagg_media_status(struct ifnet *, struct ifmediareq *);
  176 static struct lagg_port *lagg_link_active(struct lagg_softc *,
  177             struct lagg_port *);
  178 
  179 /* Simple round robin */
  180 static void     lagg_rr_attach(struct lagg_softc *);
  181 static int      lagg_rr_start(struct lagg_softc *, struct mbuf *);
  182 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
  183                     struct mbuf *);
  184 
  185 /* Active failover */
  186 static int      lagg_fail_start(struct lagg_softc *, struct mbuf *);
  187 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
  188                     struct mbuf *);
  189 
  190 /* Loadbalancing */
  191 static void     lagg_lb_attach(struct lagg_softc *);
  192 static void     lagg_lb_detach(struct lagg_softc *);
  193 static int      lagg_lb_port_create(struct lagg_port *);
  194 static void     lagg_lb_port_destroy(struct lagg_port *);
  195 static int      lagg_lb_start(struct lagg_softc *, struct mbuf *);
  196 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
  197                     struct mbuf *);
  198 static int      lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
  199 
  200 /* Broadcast */
  201 static int    lagg_bcast_start(struct lagg_softc *, struct mbuf *);
  202 static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
  203                     struct mbuf *);
  204 
  205 /* 802.3ad LACP */
  206 static void     lagg_lacp_attach(struct lagg_softc *);
  207 static void     lagg_lacp_detach(struct lagg_softc *);
  208 static int      lagg_lacp_start(struct lagg_softc *, struct mbuf *);
  209 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
  210                     struct mbuf *);
  211 static void     lagg_lacp_lladdr(struct lagg_softc *);
  212 
  213 /* lagg protocol table */
  214 static const struct lagg_proto {
  215         lagg_proto      pr_num;
  216         void            (*pr_attach)(struct lagg_softc *);
  217         void            (*pr_detach)(struct lagg_softc *);
  218         int             (*pr_start)(struct lagg_softc *, struct mbuf *);
  219         struct mbuf *   (*pr_input)(struct lagg_softc *, struct lagg_port *,
  220                             struct mbuf *);
  221         int             (*pr_addport)(struct lagg_port *);
  222         void            (*pr_delport)(struct lagg_port *);
  223         void            (*pr_linkstate)(struct lagg_port *);
  224         void            (*pr_init)(struct lagg_softc *);
  225         void            (*pr_stop)(struct lagg_softc *);
  226         void            (*pr_lladdr)(struct lagg_softc *);
  227         void            (*pr_request)(struct lagg_softc *, void *);
  228         void            (*pr_portreq)(struct lagg_port *, void *);
  229 } lagg_protos[] = {
  230     {
  231         .pr_num = LAGG_PROTO_NONE
  232     },
  233     {
  234         .pr_num = LAGG_PROTO_ROUNDROBIN,
  235         .pr_attach = lagg_rr_attach,
  236         .pr_start = lagg_rr_start,
  237         .pr_input = lagg_rr_input,
  238     },
  239     {
  240         .pr_num = LAGG_PROTO_FAILOVER,
  241         .pr_start = lagg_fail_start,
  242         .pr_input = lagg_fail_input,
  243     },
  244     {
  245         .pr_num = LAGG_PROTO_LOADBALANCE,
  246         .pr_attach = lagg_lb_attach,
  247         .pr_detach = lagg_lb_detach,
  248         .pr_start = lagg_lb_start,
  249         .pr_input = lagg_lb_input,
  250         .pr_addport = lagg_lb_port_create,
  251         .pr_delport = lagg_lb_port_destroy,
  252     },
  253     {
  254         .pr_num = LAGG_PROTO_LACP,
  255         .pr_attach = lagg_lacp_attach,
  256         .pr_detach = lagg_lacp_detach,
  257         .pr_start = lagg_lacp_start,
  258         .pr_input = lagg_lacp_input,
  259         .pr_addport = lacp_port_create,
  260         .pr_delport = lacp_port_destroy,
  261         .pr_linkstate = lacp_linkstate,
  262         .pr_init = lacp_init,
  263         .pr_stop = lacp_stop,
  264         .pr_lladdr = lagg_lacp_lladdr,
  265         .pr_request = lacp_req,
  266         .pr_portreq = lacp_portreq,
  267     },
  268     {
  269         .pr_num = LAGG_PROTO_BROADCAST,
  270         .pr_start = lagg_bcast_start,
  271         .pr_input = lagg_bcast_input,
  272     },
  273 };
  274 
  275 SYSCTL_DECL(_net_link);
  276 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  277     "Link Aggregation");
  278 
  279 /* Allow input on any failover links */
  280 VNET_DEFINE_STATIC(int, lagg_failover_rx_all);
  281 #define V_lagg_failover_rx_all  VNET(lagg_failover_rx_all)
  282 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
  283     &VNET_NAME(lagg_failover_rx_all), 0,
  284     "Accept input from any interface in a failover lagg");
  285 
  286 /* Default value for using flowid */
  287 VNET_DEFINE_STATIC(int, def_use_flowid) = 0;
  288 #define V_def_use_flowid        VNET(def_use_flowid)
  289 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
  290     &VNET_NAME(def_use_flowid), 0,
  291     "Default setting for using flow id for load sharing");
  292 
  293 /* Default value for using numa */
  294 VNET_DEFINE_STATIC(int, def_use_numa) = 1;
  295 #define V_def_use_numa  VNET(def_use_numa)
  296 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN,
  297     &VNET_NAME(def_use_numa), 0,
  298     "Use numa to steer flows");
  299 
  300 /* Default value for flowid shift */
  301 VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
  302 #define V_def_flowid_shift      VNET(def_flowid_shift)
  303 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
  304     &VNET_NAME(def_flowid_shift), 0,
  305     "Default setting for flowid shift for load sharing");
  306 
  307 static void
  308 vnet_lagg_init(const void *unused __unused)
  309 {
  310 
  311         LAGG_LIST_LOCK_INIT();
  312         SLIST_INIT(&V_lagg_list);
  313         struct if_clone_addreq req = {
  314                 .create_f = lagg_clone_create,
  315                 .destroy_f = lagg_clone_destroy,
  316                 .flags = IFC_F_AUTOUNIT,
  317         };
  318         V_lagg_cloner = ifc_attach_cloner(laggname, &req);
  319 }
  320 VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
  321     vnet_lagg_init, NULL);
  322 
  323 static void
  324 vnet_lagg_uninit(const void *unused __unused)
  325 {
  326 
  327         ifc_detach_cloner(V_lagg_cloner);
  328         LAGG_LIST_LOCK_DESTROY();
  329 }
  330 VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
  331     vnet_lagg_uninit, NULL);
  332 
  333 static int
  334 lagg_modevent(module_t mod, int type, void *data)
  335 {
  336 
  337         switch (type) {
  338         case MOD_LOAD:
  339                 lagg_input_ethernet_p = lagg_input_ethernet;
  340                 lagg_input_infiniband_p = lagg_input_infiniband;
  341                 lagg_linkstate_p = lagg_port_state;
  342                 lagg_detach_cookie = EVENTHANDLER_REGISTER(
  343                     ifnet_departure_event, lagg_port_ifdetach, NULL,
  344                     EVENTHANDLER_PRI_ANY);
  345                 break;
  346         case MOD_UNLOAD:
  347                 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
  348                     lagg_detach_cookie);
  349                 lagg_input_ethernet_p = NULL;
  350                 lagg_input_infiniband_p = NULL;
  351                 lagg_linkstate_p = NULL;
  352                 break;
  353         default:
  354                 return (EOPNOTSUPP);
  355         }
  356         return (0);
  357 }
  358 
  359 static moduledata_t lagg_mod = {
  360         "if_lagg",
  361         lagg_modevent,
  362         0
  363 };
  364 
  365 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
  366 MODULE_VERSION(if_lagg, 1);
  367 MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1);
  368 
  369 static void
  370 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
  371 {
  372 
  373         LAGG_XLOCK_ASSERT(sc);
  374         KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
  375             __func__, sc));
  376 
  377         if (sc->sc_ifflags & IFF_DEBUG)
  378                 if_printf(sc->sc_ifp, "using proto %u\n", pr);
  379 
  380         if (lagg_protos[pr].pr_attach != NULL)
  381                 lagg_protos[pr].pr_attach(sc);
  382         sc->sc_proto = pr;
  383 }
  384 
  385 static void
  386 lagg_proto_detach(struct lagg_softc *sc)
  387 {
  388         lagg_proto pr;
  389 
  390         LAGG_XLOCK_ASSERT(sc);
  391         pr = sc->sc_proto;
  392         sc->sc_proto = LAGG_PROTO_NONE;
  393 
  394         if (lagg_protos[pr].pr_detach != NULL)
  395                 lagg_protos[pr].pr_detach(sc);
  396 }
  397 
  398 static int
  399 lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
  400 {
  401 
  402         return (lagg_protos[sc->sc_proto].pr_start(sc, m));
  403 }
  404 
  405 static struct mbuf *
  406 lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
  407 {
  408 
  409         return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
  410 }
  411 
  412 static int
  413 lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
  414 {
  415 
  416         if (lagg_protos[sc->sc_proto].pr_addport == NULL)
  417                 return (0);
  418         else
  419                 return (lagg_protos[sc->sc_proto].pr_addport(lp));
  420 }
  421 
  422 static void
  423 lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
  424 {
  425 
  426         if (lagg_protos[sc->sc_proto].pr_delport != NULL)
  427                 lagg_protos[sc->sc_proto].pr_delport(lp);
  428 }
  429 
  430 static void
  431 lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
  432 {
  433 
  434         if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
  435                 lagg_protos[sc->sc_proto].pr_linkstate(lp);
  436 }
  437 
  438 static void
  439 lagg_proto_init(struct lagg_softc *sc)
  440 {
  441 
  442         if (lagg_protos[sc->sc_proto].pr_init != NULL)
  443                 lagg_protos[sc->sc_proto].pr_init(sc);
  444 }
  445 
  446 static void
  447 lagg_proto_stop(struct lagg_softc *sc)
  448 {
  449 
  450         if (lagg_protos[sc->sc_proto].pr_stop != NULL)
  451                 lagg_protos[sc->sc_proto].pr_stop(sc);
  452 }
  453 
  454 static void
  455 lagg_proto_lladdr(struct lagg_softc *sc)
  456 {
  457 
  458         if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
  459                 lagg_protos[sc->sc_proto].pr_lladdr(sc);
  460 }
  461 
  462 static void
  463 lagg_proto_request(struct lagg_softc *sc, void *v)
  464 {
  465 
  466         if (lagg_protos[sc->sc_proto].pr_request != NULL)
  467                 lagg_protos[sc->sc_proto].pr_request(sc, v);
  468 }
  469 
  470 static void
  471 lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
  472 {
  473 
  474         if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
  475                 lagg_protos[sc->sc_proto].pr_portreq(lp, v);
  476 }
  477 
  478 /*
  479  * This routine is run via an vlan
  480  * config EVENT
  481  */
  482 static void
  483 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  484 {
  485         struct lagg_softc *sc = ifp->if_softc;
  486         struct lagg_port *lp;
  487 
  488         if (ifp->if_softc !=  arg)   /* Not our event */
  489                 return;
  490 
  491         LAGG_XLOCK(sc);
  492         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  493                 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
  494         LAGG_XUNLOCK(sc);
  495 }
  496 
  497 /*
  498  * This routine is run via an vlan
  499  * unconfig EVENT
  500  */
  501 static void
  502 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
  503 {
  504         struct lagg_softc *sc = ifp->if_softc;
  505         struct lagg_port *lp;
  506 
  507         if (ifp->if_softc !=  arg)   /* Not our event */
  508                 return;
  509 
  510         LAGG_XLOCK(sc);
  511         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
  512                 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
  513         LAGG_XUNLOCK(sc);
  514 }
  515 
  516 static int
  517 lagg_clone_create(struct if_clone *ifc, char *name, size_t len,
  518     struct ifc_data *ifd, struct ifnet **ifpp)
  519 {
  520         struct iflaggparam iflp;
  521         struct lagg_softc *sc;
  522         struct ifnet *ifp;
  523         int if_type;
  524         int error;
  525         static const uint8_t eaddr[LAGG_ADDR_LEN];
  526 
  527         if (ifd->params != NULL) {
  528                 error = ifc_copyin(ifd, &iflp, sizeof(iflp));
  529                 if (error)
  530                         return (error);
  531 
  532                 switch (iflp.lagg_type) {
  533                 case LAGG_TYPE_ETHERNET:
  534                         if_type = IFT_ETHER;
  535                         break;
  536                 case LAGG_TYPE_INFINIBAND:
  537                         if_type = IFT_INFINIBAND;
  538                         break;
  539                 default:
  540                         return (EINVAL);
  541                 }
  542         } else {
  543                 if_type = IFT_ETHER;
  544         }
  545 
  546         sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
  547         ifp = sc->sc_ifp = if_alloc(if_type);
  548         if (ifp == NULL) {
  549                 free(sc, M_LAGG);
  550                 return (ENOSPC);
  551         }
  552         LAGG_SX_INIT(sc);
  553 
  554         mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF);
  555         callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0);
  556 
  557         LAGG_XLOCK(sc);
  558         if (V_def_use_flowid)
  559                 sc->sc_opts |= LAGG_OPT_USE_FLOWID;
  560         if (V_def_use_numa)
  561                 sc->sc_opts |= LAGG_OPT_USE_NUMA;
  562         sc->flowid_shift = V_def_flowid_shift;
  563 
  564         /* Hash all layers by default */
  565         sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
  566 
  567         lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
  568 
  569         CK_SLIST_INIT(&sc->sc_ports);
  570 
  571         switch (if_type) {
  572         case IFT_ETHER:
  573                 /* Initialise pseudo media types */
  574                 ifmedia_init(&sc->sc_media, 0, lagg_media_change,
  575                     lagg_media_status);
  576                 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
  577                 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
  578 
  579                 if_initname(ifp, laggname, ifd->unit);
  580                 ifp->if_transmit = lagg_transmit_ethernet;
  581                 break;
  582         case IFT_INFINIBAND:
  583                 if_initname(ifp, laggname, ifd->unit);
  584                 ifp->if_transmit = lagg_transmit_infiniband;
  585                 break;
  586         default:
  587                 break;
  588         }
  589         ifp->if_softc = sc;
  590         ifp->if_qflush = lagg_qflush;
  591         ifp->if_init = lagg_init;
  592         ifp->if_ioctl = lagg_ioctl;
  593         ifp->if_get_counter = lagg_get_counter;
  594         ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
  595 #if defined(KERN_TLS) || defined(RATELIMIT)
  596         ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
  597         ifp->if_ratelimit_query = lagg_ratelimit_query;
  598 #endif
  599         ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
  600 
  601         /*
  602          * Attach as an ordinary ethernet device, children will be attached
  603          * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG.
  604          */
  605         switch (if_type) {
  606         case IFT_ETHER:
  607                 ether_ifattach(ifp, eaddr);
  608                 break;
  609         case IFT_INFINIBAND:
  610                 infiniband_ifattach(ifp, eaddr, sc->sc_bcast_addr);
  611                 break;
  612         default:
  613                 break;
  614         }
  615 
  616         sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
  617                 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
  618         sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
  619                 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
  620 
  621         /* Insert into the global list of laggs */
  622         LAGG_LIST_LOCK();
  623         SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
  624         LAGG_LIST_UNLOCK();
  625         LAGG_XUNLOCK(sc);
  626         *ifpp = ifp;
  627 
  628         return (0);
  629 }
  630 
  631 static int
  632 lagg_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
  633 {
  634         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
  635         struct lagg_port *lp;
  636 
  637         LAGG_XLOCK(sc);
  638         sc->sc_destroying = 1;
  639         lagg_stop(sc);
  640         ifp->if_flags &= ~IFF_UP;
  641 
  642         EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
  643         EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
  644 
  645         /* Shutdown and remove lagg ports */
  646         while ((lp = CK_SLIST_FIRST(&sc->sc_ports)) != NULL)
  647                 lagg_port_destroy(lp, 1);
  648 
  649         /* Unhook the aggregation protocol */
  650         lagg_proto_detach(sc);
  651         LAGG_XUNLOCK(sc);
  652 
  653         switch (ifp->if_type) {
  654         case IFT_ETHER:
  655                 ifmedia_removeall(&sc->sc_media);
  656                 ether_ifdetach(ifp);
  657                 break;
  658         case IFT_INFINIBAND:
  659                 infiniband_ifdetach(ifp);
  660                 break;
  661         default:
  662                 break;
  663         }
  664         if_free(ifp);
  665 
  666         LAGG_LIST_LOCK();
  667         SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
  668         LAGG_LIST_UNLOCK();
  669 
  670         mtx_destroy(&sc->sc_mtx);
  671         LAGG_SX_DESTROY(sc);
  672         free(sc, M_LAGG);
  673 
  674         return (0);
  675 }
  676 
  677 static void
  678 lagg_capabilities(struct lagg_softc *sc)
  679 {
  680         struct lagg_port *lp;
  681         int cap, cap2, ena, ena2, pena, pena2;
  682         uint64_t hwa;
  683         struct ifnet_hw_tsomax hw_tsomax;
  684 
  685         LAGG_XLOCK_ASSERT(sc);
  686 
  687         /* Get common enabled capabilities for the lagg ports */
  688         ena = ena2 = ~0;
  689         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  690                 ena &= lp->lp_ifp->if_capenable;
  691                 ena2 &= lp->lp_ifp->if_capenable2;
  692         }
  693         if (CK_SLIST_FIRST(&sc->sc_ports) == NULL)
  694                 ena = ena2 = 0;
  695 
  696         /*
  697          * Apply common enabled capabilities back to the lagg ports.
  698          * May require several iterations if they are dependent.
  699          */
  700         do {
  701                 pena = ena;
  702                 pena2 = ena2;
  703                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  704                         lagg_setcaps(lp, ena, ena2);
  705                         ena &= lp->lp_ifp->if_capenable;
  706                         ena2 &= lp->lp_ifp->if_capenable2;
  707                 }
  708         } while (pena != ena || pena2 != ena2);
  709 
  710         /* Get other capabilities from the lagg ports */
  711         cap = cap2 = ~0;
  712         hwa = ~(uint64_t)0;
  713         memset(&hw_tsomax, 0, sizeof(hw_tsomax));
  714         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  715                 cap &= lp->lp_ifp->if_capabilities;
  716                 cap2 &= lp->lp_ifp->if_capabilities2;
  717                 hwa &= lp->lp_ifp->if_hwassist;
  718                 if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
  719         }
  720         if (CK_SLIST_FIRST(&sc->sc_ports) == NULL)
  721                 cap = cap2 = hwa = 0;
  722 
  723         if (sc->sc_ifp->if_capabilities != cap ||
  724             sc->sc_ifp->if_capenable != ena ||
  725             sc->sc_ifp->if_capenable2 != ena2 ||
  726             sc->sc_ifp->if_hwassist != hwa ||
  727             if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
  728                 sc->sc_ifp->if_capabilities = cap;
  729                 sc->sc_ifp->if_capabilities2 = cap2;
  730                 sc->sc_ifp->if_capenable = ena;
  731                 sc->sc_ifp->if_capenable2 = ena2;
  732                 sc->sc_ifp->if_hwassist = hwa;
  733                 getmicrotime(&sc->sc_ifp->if_lastchange);
  734 
  735                 if (sc->sc_ifflags & IFF_DEBUG)
  736                         if_printf(sc->sc_ifp,
  737                             "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
  738         }
  739 }
  740 
  741 static int
  742 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
  743 {
  744         struct lagg_softc *sc_ptr;
  745         struct lagg_port *lp, *tlp;
  746         struct ifreq ifr;
  747         int error, i, oldmtu;
  748         int if_type;
  749         uint64_t *pval;
  750 
  751         LAGG_XLOCK_ASSERT(sc);
  752 
  753         if (sc->sc_ifp == ifp) {
  754                 if_printf(sc->sc_ifp,
  755                     "cannot add a lagg to itself as a port\n");
  756                 return (EINVAL);
  757         }
  758 
  759         if (sc->sc_destroying == 1)
  760                 return (ENXIO);
  761 
  762         /* Limit the maximal number of lagg ports */
  763         if (sc->sc_count >= LAGG_MAX_PORTS)
  764                 return (ENOSPC);
  765 
  766         /* Check if port has already been associated to a lagg */
  767         if (ifp->if_lagg != NULL) {
  768                 /* Port is already in the current lagg? */
  769                 lp = (struct lagg_port *)ifp->if_lagg;
  770                 if (lp->lp_softc == sc)
  771                         return (EEXIST);
  772                 return (EBUSY);
  773         }
  774 
  775         switch (sc->sc_ifp->if_type) {
  776         case IFT_ETHER:
  777                 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
  778                 if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
  779                         return (EPROTONOSUPPORT);
  780                 if_type = IFT_IEEE8023ADLAG;
  781                 break;
  782         case IFT_INFINIBAND:
  783                 /* XXX Disallow non-infiniband interfaces */
  784                 if (ifp->if_type != IFT_INFINIBAND)
  785                         return (EPROTONOSUPPORT);
  786                 if_type = IFT_INFINIBANDLAG;
  787                 break;
  788         default:
  789                 break;
  790         }
  791 
  792         /* Allow the first Ethernet member to define the MTU */
  793         oldmtu = -1;
  794         if (CK_SLIST_EMPTY(&sc->sc_ports)) {
  795                 sc->sc_ifp->if_mtu = ifp->if_mtu;
  796         } else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
  797                 if (ifp->if_ioctl == NULL) {
  798                         if_printf(sc->sc_ifp, "cannot change MTU for %s\n",
  799                             ifp->if_xname);
  800                         return (EINVAL);
  801                 }
  802                 oldmtu = ifp->if_mtu;
  803                 strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name));
  804                 ifr.ifr_mtu = sc->sc_ifp->if_mtu;
  805                 error = (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
  806                 if (error != 0) {
  807                         if_printf(sc->sc_ifp, "invalid MTU for %s\n",
  808                             ifp->if_xname);
  809                         return (error);
  810                 }
  811                 ifr.ifr_mtu = oldmtu;
  812         }
  813 
  814         lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO);
  815         lp->lp_softc = sc;
  816 
  817         /* Check if port is a stacked lagg */
  818         LAGG_LIST_LOCK();
  819         SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
  820                 if (ifp == sc_ptr->sc_ifp) {
  821                         LAGG_LIST_UNLOCK();
  822                         free(lp, M_LAGG);
  823                         if (oldmtu != -1)
  824                                 (*ifp->if_ioctl)(ifp, SIOCSIFMTU,
  825                                     (caddr_t)&ifr);
  826                         return (EINVAL);
  827                         /* XXX disable stacking for the moment, its untested */
  828 #ifdef LAGG_PORT_STACKING
  829                         lp->lp_flags |= LAGG_PORT_STACK;
  830                         if (lagg_port_checkstacking(sc_ptr) >=
  831                             LAGG_MAX_STACKING) {
  832                                 LAGG_LIST_UNLOCK();
  833                                 free(lp, M_LAGG);
  834                                 if (oldmtu != -1)
  835                                         (*ifp->if_ioctl)(ifp, SIOCSIFMTU,
  836                                             (caddr_t)&ifr);
  837                                 return (E2BIG);
  838                         }
  839 #endif
  840                 }
  841         }
  842         LAGG_LIST_UNLOCK();
  843 
  844         if_ref(ifp);
  845         lp->lp_ifp = ifp;
  846 
  847         bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen);
  848         lp->lp_ifcapenable = ifp->if_capenable;
  849         if (CK_SLIST_EMPTY(&sc->sc_ports)) {
  850                 bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
  851                 lagg_proto_lladdr(sc);
  852                 EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  853         } else {
  854                 if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
  855         }
  856         lagg_setflags(lp, 1);
  857 
  858         if (CK_SLIST_EMPTY(&sc->sc_ports))
  859                 sc->sc_primary = lp;
  860 
  861         /* Change the interface type */
  862         lp->lp_iftype = ifp->if_type;
  863         ifp->if_type = if_type;
  864         ifp->if_lagg = lp;
  865         lp->lp_ioctl = ifp->if_ioctl;
  866         ifp->if_ioctl = lagg_port_ioctl;
  867         lp->lp_output = ifp->if_output;
  868         ifp->if_output = lagg_port_output;
  869 
  870         /* Read port counters */
  871         pval = lp->port_counters.val;
  872         for (i = 0; i < IFCOUNTERS; i++, pval++)
  873                 *pval = ifp->if_get_counter(ifp, i);
  874 
  875         /*
  876          * Insert into the list of ports.
  877          * Keep ports sorted by if_index. It is handy, when configuration
  878          * is predictable and `ifconfig laggN create ...` command
  879          * will lead to the same result each time.
  880          */
  881         CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
  882                 if (tlp->lp_ifp->if_index < ifp->if_index && (
  883                     CK_SLIST_NEXT(tlp, lp_entries) == NULL ||
  884                     ((struct  lagg_port*)CK_SLIST_NEXT(tlp, lp_entries))->lp_ifp->if_index >
  885                     ifp->if_index))
  886                         break;
  887         }
  888         if (tlp != NULL)
  889                 CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries);
  890         else
  891                 CK_SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
  892         sc->sc_count++;
  893 
  894         lagg_setmulti(lp);
  895 
  896         if ((error = lagg_proto_addport(sc, lp)) != 0) {
  897                 /* Remove the port, without calling pr_delport. */
  898                 lagg_port_destroy(lp, 0);
  899                 if (oldmtu != -1)
  900                         (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
  901                 return (error);
  902         }
  903 
  904         /* Update lagg capabilities */
  905         lagg_capabilities(sc);
  906         lagg_linkstate(sc);
  907 
  908         return (0);
  909 }
  910 
  911 #ifdef LAGG_PORT_STACKING
  912 static int
  913 lagg_port_checkstacking(struct lagg_softc *sc)
  914 {
  915         struct lagg_softc *sc_ptr;
  916         struct lagg_port *lp;
  917         int m = 0;
  918 
  919         LAGG_SXLOCK_ASSERT(sc);
  920         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
  921                 if (lp->lp_flags & LAGG_PORT_STACK) {
  922                         sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
  923                         m = MAX(m, lagg_port_checkstacking(sc_ptr));
  924                 }
  925         }
  926 
  927         return (m + 1);
  928 }
  929 #endif
  930 
  931 static void
  932 lagg_port_destroy_cb(epoch_context_t ec)
  933 {
  934         struct lagg_port *lp;
  935         struct ifnet *ifp;
  936 
  937         lp = __containerof(ec, struct lagg_port, lp_epoch_ctx);
  938         ifp = lp->lp_ifp;
  939 
  940         if_rele(ifp);
  941         free(lp, M_LAGG);
  942 }
  943 
  944 static int
  945 lagg_port_destroy(struct lagg_port *lp, int rundelport)
  946 {
  947         struct lagg_softc *sc = lp->lp_softc;
  948         struct lagg_port *lp_ptr, *lp0;
  949         struct ifnet *ifp = lp->lp_ifp;
  950         uint64_t *pval, vdiff;
  951         int i;
  952 
  953         LAGG_XLOCK_ASSERT(sc);
  954 
  955         if (rundelport)
  956                 lagg_proto_delport(sc, lp);
  957 
  958         if (lp->lp_detaching == 0)
  959                 lagg_clrmulti(lp);
  960 
  961         /* Restore interface */
  962         ifp->if_type = lp->lp_iftype;
  963         ifp->if_ioctl = lp->lp_ioctl;
  964         ifp->if_output = lp->lp_output;
  965         ifp->if_lagg = NULL;
  966 
  967         /* Update detached port counters */
  968         pval = lp->port_counters.val;
  969         for (i = 0; i < IFCOUNTERS; i++, pval++) {
  970                 vdiff = ifp->if_get_counter(ifp, i) - *pval;
  971                 sc->detached_counters.val[i] += vdiff;
  972         }
  973 
  974         /* Finally, remove the port from the lagg */
  975         CK_SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
  976         sc->sc_count--;
  977 
  978         /* Update the primary interface */
  979         if (lp == sc->sc_primary) {
  980                 uint8_t lladdr[LAGG_ADDR_LEN];
  981 
  982                 if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL)
  983                         bzero(&lladdr, LAGG_ADDR_LEN);
  984                 else
  985                         bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN);
  986                 sc->sc_primary = lp0;
  987                 if (sc->sc_destroying == 0) {
  988                         bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen);
  989                         lagg_proto_lladdr(sc);
  990                         EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
  991 
  992                         /*
  993                          * Update lladdr for each port (new primary needs update
  994                          * as well, to switch from old lladdr to its 'real' one).
  995                          * We can skip this if the lagg is being destroyed.
  996                          */
  997                         CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
  998                                 if_setlladdr(lp_ptr->lp_ifp, lladdr,
  999                                     lp_ptr->lp_ifp->if_addrlen);
 1000                 }
 1001         }
 1002 
 1003         if (lp->lp_ifflags)
 1004                 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
 1005 
 1006         if (lp->lp_detaching == 0) {
 1007                 lagg_setflags(lp, 0);
 1008                 lagg_setcaps(lp, lp->lp_ifcapenable, lp->lp_ifcapenable2);
 1009                 if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen);
 1010         }
 1011 
 1012         /*
 1013          * free port and release it's ifnet reference after a grace period has
 1014          * elapsed.
 1015          */
 1016         NET_EPOCH_CALL(lagg_port_destroy_cb, &lp->lp_epoch_ctx);
 1017         /* Update lagg capabilities */
 1018         lagg_capabilities(sc);
 1019         lagg_linkstate(sc);
 1020 
 1021         return (0);
 1022 }
 1023 
 1024 static int
 1025 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 1026 {
 1027         struct epoch_tracker et;
 1028         struct lagg_reqport *rp = (struct lagg_reqport *)data;
 1029         struct lagg_softc *sc;
 1030         struct lagg_port *lp = NULL;
 1031         int error = 0;
 1032 
 1033         /* Should be checked by the caller */
 1034         switch (ifp->if_type) {
 1035         case IFT_IEEE8023ADLAG:
 1036         case IFT_INFINIBANDLAG:
 1037                 if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
 1038                         goto fallback;
 1039                 break;
 1040         default:
 1041                 goto fallback;
 1042         }
 1043 
 1044         switch (cmd) {
 1045         case SIOCGLAGGPORT:
 1046                 if (rp->rp_portname[0] == '\0' ||
 1047                     ifunit(rp->rp_portname) != ifp) {
 1048                         error = EINVAL;
 1049                         break;
 1050                 }
 1051 
 1052                 NET_EPOCH_ENTER(et);
 1053                 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
 1054                         error = ENOENT;
 1055                         NET_EPOCH_EXIT(et);
 1056                         break;
 1057                 }
 1058 
 1059                 lagg_port2req(lp, rp);
 1060                 NET_EPOCH_EXIT(et);
 1061                 break;
 1062 
 1063         case SIOCSIFCAP:
 1064         case SIOCSIFCAPNV:
 1065                 if (lp->lp_ioctl == NULL) {
 1066                         error = EINVAL;
 1067                         break;
 1068                 }
 1069                 error = (*lp->lp_ioctl)(ifp, cmd, data);
 1070                 if (error)
 1071                         break;
 1072 
 1073                 /* Update lagg interface capabilities */
 1074                 LAGG_XLOCK(sc);
 1075                 lagg_capabilities(sc);
 1076                 LAGG_XUNLOCK(sc);
 1077                 VLAN_CAPABILITIES(sc->sc_ifp);
 1078                 break;
 1079 
 1080         case SIOCSIFMTU:
 1081                 /* Do not allow the MTU to be changed once joined */
 1082                 error = EINVAL;
 1083                 break;
 1084 
 1085         default:
 1086                 goto fallback;
 1087         }
 1088 
 1089         return (error);
 1090 
 1091 fallback:
 1092         if (lp != NULL && lp->lp_ioctl != NULL)
 1093                 return ((*lp->lp_ioctl)(ifp, cmd, data));
 1094 
 1095         return (EINVAL);
 1096 }
 1097 
 1098 /*
 1099  * Requests counter @cnt data. 
 1100  *
 1101  * Counter value is calculated the following way:
 1102  * 1) for each port, sum  difference between current and "initial" measurements.
 1103  * 2) add lagg logical interface counters.
 1104  * 3) add data from detached_counters array.
 1105  *
 1106  * We also do the following things on ports attach/detach:
 1107  * 1) On port attach we store all counters it has into port_counter array. 
 1108  * 2) On port detach we add the different between "initial" and
 1109  *   current counters data to detached_counters array.
 1110  */
 1111 static uint64_t
 1112 lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
 1113 {
 1114         struct epoch_tracker et;
 1115         struct lagg_softc *sc;
 1116         struct lagg_port *lp;
 1117         struct ifnet *lpifp;
 1118         uint64_t newval, oldval, vsum;
 1119 
 1120         /* Revise this when we've got non-generic counters. */
 1121         KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 1122 
 1123         sc = (struct lagg_softc *)ifp->if_softc;
 1124 
 1125         vsum = 0;
 1126         NET_EPOCH_ENTER(et);
 1127         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1128                 /* Saved attached value */
 1129                 oldval = lp->port_counters.val[cnt];
 1130                 /* current value */
 1131                 lpifp = lp->lp_ifp;
 1132                 newval = lpifp->if_get_counter(lpifp, cnt);
 1133                 /* Calculate diff and save new */
 1134                 vsum += newval - oldval;
 1135         }
 1136         NET_EPOCH_EXIT(et);
 1137 
 1138         /*
 1139          * Add counter data which might be added by upper
 1140          * layer protocols operating on logical interface.
 1141          */
 1142         vsum += if_get_counter_default(ifp, cnt);
 1143 
 1144         /*
 1145          * Add counter data from detached ports counters
 1146          */
 1147         vsum += sc->detached_counters.val[cnt];
 1148 
 1149         return (vsum);
 1150 }
 1151 
 1152 /*
 1153  * For direct output to child ports.
 1154  */
 1155 static int
 1156 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
 1157         const struct sockaddr *dst, struct route *ro)
 1158 {
 1159         struct lagg_port *lp = ifp->if_lagg;
 1160 
 1161         switch (dst->sa_family) {
 1162                 case pseudo_AF_HDRCMPLT:
 1163                 case AF_UNSPEC:
 1164                         if (lp != NULL)
 1165                                 return ((*lp->lp_output)(ifp, m, dst, ro));
 1166         }
 1167 
 1168         /* drop any other frames */
 1169         m_freem(m);
 1170         return (ENETDOWN);
 1171 }
 1172 
 1173 static void
 1174 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
 1175 {
 1176         struct lagg_port *lp;
 1177         struct lagg_softc *sc;
 1178 
 1179         if ((lp = ifp->if_lagg) == NULL)
 1180                 return;
 1181         /* If the ifnet is just being renamed, don't do anything. */
 1182         if (ifp->if_flags & IFF_RENAMING)
 1183                 return;
 1184 
 1185         sc = lp->lp_softc;
 1186 
 1187         LAGG_XLOCK(sc);
 1188         lp->lp_detaching = 1;
 1189         lagg_port_destroy(lp, 1);
 1190         LAGG_XUNLOCK(sc);
 1191         VLAN_CAPABILITIES(sc->sc_ifp);
 1192 }
 1193 
 1194 static void
 1195 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
 1196 {
 1197         struct lagg_softc *sc = lp->lp_softc;
 1198 
 1199         strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
 1200         strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
 1201         rp->rp_prio = lp->lp_prio;
 1202         rp->rp_flags = lp->lp_flags;
 1203         lagg_proto_portreq(sc, lp, &rp->rp_psc);
 1204 
 1205         /* Add protocol specific flags */
 1206         switch (sc->sc_proto) {
 1207                 case LAGG_PROTO_FAILOVER:
 1208                         if (lp == sc->sc_primary)
 1209                                 rp->rp_flags |= LAGG_PORT_MASTER;
 1210                         if (lp == lagg_link_active(sc, sc->sc_primary))
 1211                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1212                         break;
 1213 
 1214                 case LAGG_PROTO_ROUNDROBIN:
 1215                 case LAGG_PROTO_LOADBALANCE:
 1216                 case LAGG_PROTO_BROADCAST:
 1217                         if (LAGG_PORTACTIVE(lp))
 1218                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1219                         break;
 1220 
 1221                 case LAGG_PROTO_LACP:
 1222                         /* LACP has a different definition of active */
 1223                         if (lacp_isactive(lp))
 1224                                 rp->rp_flags |= LAGG_PORT_ACTIVE;
 1225                         if (lacp_iscollecting(lp))
 1226                                 rp->rp_flags |= LAGG_PORT_COLLECTING;
 1227                         if (lacp_isdistributing(lp))
 1228                                 rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
 1229                         break;
 1230         }
 1231 
 1232 }
 1233 
 1234 static void
 1235 lagg_watchdog_infiniband(void *arg)
 1236 {
 1237         struct epoch_tracker et;
 1238         struct lagg_softc *sc;
 1239         struct lagg_port *lp;
 1240         struct ifnet *ifp;
 1241         struct ifnet *lp_ifp;
 1242 
 1243         sc = arg;
 1244 
 1245         /*
 1246          * Because infiniband nodes have a fixed MAC address, which is
 1247          * generated by the so-called GID, we need to regularly update
 1248          * the link level address of the parent lagg<N> device when
 1249          * the active port changes. Possibly we could piggy-back on
 1250          * link up/down events aswell, but using a timer also provides
 1251          * a guarantee against too frequent events. This operation
 1252          * does not have to be atomic.
 1253          */
 1254         NET_EPOCH_ENTER(et);
 1255         lp = lagg_link_active(sc, sc->sc_primary);
 1256         if (lp != NULL) {
 1257                 ifp = sc->sc_ifp;
 1258                 lp_ifp = lp->lp_ifp;
 1259 
 1260                 if (ifp != NULL && lp_ifp != NULL &&
 1261                     (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0 ||
 1262                      memcmp(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen) != 0)) {
 1263                         memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen);
 1264                         memcpy(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen);
 1265 
 1266                         CURVNET_SET(ifp->if_vnet);
 1267                         EVENTHANDLER_INVOKE(iflladdr_event, ifp);
 1268                         CURVNET_RESTORE();
 1269                 }
 1270         }
 1271         NET_EPOCH_EXIT(et);
 1272 
 1273         callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg);
 1274 }
 1275 
 1276 static void
 1277 lagg_init(void *xsc)
 1278 {
 1279         struct lagg_softc *sc = (struct lagg_softc *)xsc;
 1280         struct ifnet *ifp = sc->sc_ifp;
 1281         struct lagg_port *lp;
 1282 
 1283         LAGG_XLOCK(sc);
 1284         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 1285                 LAGG_XUNLOCK(sc);
 1286                 return;
 1287         }
 1288 
 1289         ifp->if_drv_flags |= IFF_DRV_RUNNING;
 1290 
 1291         /*
 1292          * Update the port lladdrs if needed.
 1293          * This might be if_setlladdr() notification
 1294          * that lladdr has been changed.
 1295          */
 1296         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1297                 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
 1298                     ifp->if_addrlen) != 0)
 1299                         if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen);
 1300         }
 1301 
 1302         lagg_proto_init(sc);
 1303 
 1304         if (ifp->if_type == IFT_INFINIBAND) {
 1305                 mtx_lock(&sc->sc_mtx);
 1306                 lagg_watchdog_infiniband(sc);
 1307                 mtx_unlock(&sc->sc_mtx);
 1308         }
 1309 
 1310         LAGG_XUNLOCK(sc);
 1311 }
 1312 
 1313 static void
 1314 lagg_stop(struct lagg_softc *sc)
 1315 {
 1316         struct ifnet *ifp = sc->sc_ifp;
 1317 
 1318         LAGG_XLOCK_ASSERT(sc);
 1319 
 1320         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 1321                 return;
 1322 
 1323         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 1324 
 1325         lagg_proto_stop(sc);
 1326 
 1327         mtx_lock(&sc->sc_mtx);
 1328         callout_stop(&sc->sc_watchdog);
 1329         mtx_unlock(&sc->sc_mtx);
 1330 
 1331         callout_drain(&sc->sc_watchdog);
 1332 }
 1333 
 1334 static int
 1335 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 1336 {
 1337         struct epoch_tracker et;
 1338         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 1339         struct lagg_reqall *ra = (struct lagg_reqall *)data;
 1340         struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
 1341         struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
 1342         struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
 1343         struct ifreq *ifr = (struct ifreq *)data;
 1344         struct lagg_port *lp;
 1345         struct ifnet *tpif;
 1346         struct thread *td = curthread;
 1347         char *buf, *outbuf;
 1348         int count, buflen, len, error = 0, oldmtu;
 1349 
 1350         bzero(&rpbuf, sizeof(rpbuf));
 1351 
 1352         /* XXX: This can race with lagg_clone_destroy. */
 1353 
 1354         switch (cmd) {
 1355         case SIOCGLAGG:
 1356                 LAGG_XLOCK(sc);
 1357                 buflen = sc->sc_count * sizeof(struct lagg_reqport);
 1358                 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 1359                 ra->ra_proto = sc->sc_proto;
 1360                 lagg_proto_request(sc, &ra->ra_psc);
 1361                 count = 0;
 1362                 buf = outbuf;
 1363                 len = min(ra->ra_size, buflen);
 1364                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1365                         if (len < sizeof(rpbuf))
 1366                                 break;
 1367 
 1368                         lagg_port2req(lp, &rpbuf);
 1369                         memcpy(buf, &rpbuf, sizeof(rpbuf));
 1370                         count++;
 1371                         buf += sizeof(rpbuf);
 1372                         len -= sizeof(rpbuf);
 1373                 }
 1374                 LAGG_XUNLOCK(sc);
 1375                 ra->ra_ports = count;
 1376                 ra->ra_size = count * sizeof(rpbuf);
 1377                 error = copyout(outbuf, ra->ra_port, ra->ra_size);
 1378                 free(outbuf, M_TEMP);
 1379                 break;
 1380         case SIOCSLAGG:
 1381                 error = priv_check(td, PRIV_NET_LAGG);
 1382                 if (error)
 1383                         break;
 1384                 if (ra->ra_proto >= LAGG_PROTO_MAX) {
 1385                         error = EPROTONOSUPPORT;
 1386                         break;
 1387                 }
 1388                 /* Infiniband only supports the failover protocol. */
 1389                 if (ra->ra_proto != LAGG_PROTO_FAILOVER &&
 1390                     ifp->if_type == IFT_INFINIBAND) {
 1391                         error = EPROTONOSUPPORT;
 1392                         break;
 1393                 }
 1394                 LAGG_XLOCK(sc);
 1395                 lagg_proto_detach(sc);
 1396                 lagg_proto_attach(sc, ra->ra_proto);
 1397                 LAGG_XUNLOCK(sc);
 1398                 break;
 1399         case SIOCGLAGGOPTS:
 1400                 LAGG_XLOCK(sc);
 1401                 ro->ro_opts = sc->sc_opts;
 1402                 if (sc->sc_proto == LAGG_PROTO_LACP) {
 1403                         struct lacp_softc *lsc;
 1404 
 1405                         lsc = (struct lacp_softc *)sc->sc_psc;
 1406                         if (lsc->lsc_debug.lsc_tx_test != 0)
 1407                                 ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
 1408                         if (lsc->lsc_debug.lsc_rx_test != 0)
 1409                                 ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
 1410                         if (lsc->lsc_strict_mode != 0)
 1411                                 ro->ro_opts |= LAGG_OPT_LACP_STRICT;
 1412                         if (lsc->lsc_fast_timeout != 0)
 1413                                 ro->ro_opts |= LAGG_OPT_LACP_FAST_TIMO;
 1414 
 1415                         ro->ro_active = sc->sc_active;
 1416                 } else {
 1417                         ro->ro_active = 0;
 1418                         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 1419                                 ro->ro_active += LAGG_PORTACTIVE(lp);
 1420                 }
 1421                 ro->ro_bkt = sc->sc_stride;
 1422                 ro->ro_flapping = sc->sc_flapping;
 1423                 ro->ro_flowid_shift = sc->flowid_shift;
 1424                 LAGG_XUNLOCK(sc);
 1425                 break;
 1426         case SIOCSLAGGOPTS:
 1427                 error = priv_check(td, PRIV_NET_LAGG);
 1428                 if (error)
 1429                         break;
 1430 
 1431                 /*
 1432                  * The stride option was added without defining a corresponding
 1433                  * LAGG_OPT flag, so handle a non-zero value before checking
 1434                  * anything else to preserve compatibility.
 1435                  */
 1436                 LAGG_XLOCK(sc);
 1437                 if (ro->ro_opts == 0 && ro->ro_bkt != 0) {
 1438                         if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN) {
 1439                                 LAGG_XUNLOCK(sc);
 1440                                 error = EINVAL;
 1441                                 break;
 1442                         }
 1443                         sc->sc_stride = ro->ro_bkt;
 1444                 }
 1445                 if (ro->ro_opts == 0) {
 1446                         LAGG_XUNLOCK(sc);
 1447                         break;
 1448                 }
 1449 
 1450                 /*
 1451                  * Set options.  LACP options are stored in sc->sc_psc,
 1452                  * not in sc_opts.
 1453                  */
 1454                 int valid, lacp;
 1455 
 1456                 switch (ro->ro_opts) {
 1457                 case LAGG_OPT_USE_FLOWID:
 1458                 case -LAGG_OPT_USE_FLOWID:
 1459                 case LAGG_OPT_USE_NUMA:
 1460                 case -LAGG_OPT_USE_NUMA:
 1461                 case LAGG_OPT_FLOWIDSHIFT:
 1462                 case LAGG_OPT_RR_LIMIT:
 1463                         valid = 1;
 1464                         lacp = 0;
 1465                         break;
 1466                 case LAGG_OPT_LACP_TXTEST:
 1467                 case -LAGG_OPT_LACP_TXTEST:
 1468                 case LAGG_OPT_LACP_RXTEST:
 1469                 case -LAGG_OPT_LACP_RXTEST:
 1470                 case LAGG_OPT_LACP_STRICT:
 1471                 case -LAGG_OPT_LACP_STRICT:
 1472                 case LAGG_OPT_LACP_FAST_TIMO:
 1473                 case -LAGG_OPT_LACP_FAST_TIMO:
 1474                         valid = lacp = 1;
 1475                         break;
 1476                 default:
 1477                         valid = lacp = 0;
 1478                         break;
 1479                 }
 1480 
 1481                 if (valid == 0 ||
 1482                     (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
 1483                         /* Invalid combination of options specified. */
 1484                         error = EINVAL;
 1485                         LAGG_XUNLOCK(sc);
 1486                         break;  /* Return from SIOCSLAGGOPTS. */ 
 1487                 }
 1488 
 1489                 /*
 1490                  * Store new options into sc->sc_opts except for
 1491                  * FLOWIDSHIFT, RR and LACP options.
 1492                  */
 1493                 if (lacp == 0) {
 1494                         if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
 1495                                 sc->flowid_shift = ro->ro_flowid_shift;
 1496                         else if (ro->ro_opts == LAGG_OPT_RR_LIMIT) {
 1497                                 if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN ||
 1498                                     ro->ro_bkt == 0) {
 1499                                         error = EINVAL;
 1500                                         LAGG_XUNLOCK(sc);
 1501                                         break;
 1502                                 }
 1503                                 sc->sc_stride = ro->ro_bkt;
 1504                         } else if (ro->ro_opts > 0)
 1505                                 sc->sc_opts |= ro->ro_opts;
 1506                         else
 1507                                 sc->sc_opts &= ~ro->ro_opts;
 1508                 } else {
 1509                         struct lacp_softc *lsc;
 1510                         struct lacp_port *lp;
 1511 
 1512                         lsc = (struct lacp_softc *)sc->sc_psc;
 1513 
 1514                         switch (ro->ro_opts) {
 1515                         case LAGG_OPT_LACP_TXTEST:
 1516                                 lsc->lsc_debug.lsc_tx_test = 1;
 1517                                 break;
 1518                         case -LAGG_OPT_LACP_TXTEST:
 1519                                 lsc->lsc_debug.lsc_tx_test = 0;
 1520                                 break;
 1521                         case LAGG_OPT_LACP_RXTEST:
 1522                                 lsc->lsc_debug.lsc_rx_test = 1;
 1523                                 break;
 1524                         case -LAGG_OPT_LACP_RXTEST:
 1525                                 lsc->lsc_debug.lsc_rx_test = 0;
 1526                                 break;
 1527                         case LAGG_OPT_LACP_STRICT:
 1528                                 lsc->lsc_strict_mode = 1;
 1529                                 break;
 1530                         case -LAGG_OPT_LACP_STRICT:
 1531                                 lsc->lsc_strict_mode = 0;
 1532                                 break;
 1533                         case LAGG_OPT_LACP_FAST_TIMO:
 1534                                 LACP_LOCK(lsc);
 1535                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1536                                         lp->lp_state |= LACP_STATE_TIMEOUT;
 1537                                 LACP_UNLOCK(lsc);
 1538                                 lsc->lsc_fast_timeout = 1;
 1539                                 break;
 1540                         case -LAGG_OPT_LACP_FAST_TIMO:
 1541                                 LACP_LOCK(lsc);
 1542                                 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
 1543                                         lp->lp_state &= ~LACP_STATE_TIMEOUT;
 1544                                 LACP_UNLOCK(lsc);
 1545                                 lsc->lsc_fast_timeout = 0;
 1546                                 break;
 1547                         }
 1548                 }
 1549                 LAGG_XUNLOCK(sc);
 1550                 break;
 1551         case SIOCGLAGGFLAGS:
 1552                 rf->rf_flags = 0;
 1553                 LAGG_XLOCK(sc);
 1554                 if (sc->sc_flags & MBUF_HASHFLAG_L2)
 1555                         rf->rf_flags |= LAGG_F_HASHL2;
 1556                 if (sc->sc_flags & MBUF_HASHFLAG_L3)
 1557                         rf->rf_flags |= LAGG_F_HASHL3;
 1558                 if (sc->sc_flags & MBUF_HASHFLAG_L4)
 1559                         rf->rf_flags |= LAGG_F_HASHL4;
 1560                 LAGG_XUNLOCK(sc);
 1561                 break;
 1562         case SIOCSLAGGHASH:
 1563                 error = priv_check(td, PRIV_NET_LAGG);
 1564                 if (error)
 1565                         break;
 1566                 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
 1567                         error = EINVAL;
 1568                         break;
 1569                 }
 1570                 LAGG_XLOCK(sc);
 1571                 sc->sc_flags = 0;
 1572                 if (rf->rf_flags & LAGG_F_HASHL2)
 1573                         sc->sc_flags |= MBUF_HASHFLAG_L2;
 1574                 if (rf->rf_flags & LAGG_F_HASHL3)
 1575                         sc->sc_flags |= MBUF_HASHFLAG_L3;
 1576                 if (rf->rf_flags & LAGG_F_HASHL4)
 1577                         sc->sc_flags |= MBUF_HASHFLAG_L4;
 1578                 LAGG_XUNLOCK(sc);
 1579                 break;
 1580         case SIOCGLAGGPORT:
 1581                 if (rp->rp_portname[0] == '\0' ||
 1582                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1583                         error = EINVAL;
 1584                         break;
 1585                 }
 1586 
 1587                 NET_EPOCH_ENTER(et);
 1588                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1589                     lp->lp_softc != sc) {
 1590                         error = ENOENT;
 1591                         NET_EPOCH_EXIT(et);
 1592                         if_rele(tpif);
 1593                         break;
 1594                 }
 1595 
 1596                 lagg_port2req(lp, rp);
 1597                 NET_EPOCH_EXIT(et);
 1598                 if_rele(tpif);
 1599                 break;
 1600         case SIOCSLAGGPORT:
 1601                 error = priv_check(td, PRIV_NET_LAGG);
 1602                 if (error)
 1603                         break;
 1604                 if (rp->rp_portname[0] == '\0' ||
 1605                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1606                         error = EINVAL;
 1607                         break;
 1608                 }
 1609 #ifdef INET6
 1610                 /*
 1611                  * A laggport interface should not have inet6 address
 1612                  * because two interfaces with a valid link-local
 1613                  * scope zone must not be merged in any form.  This
 1614                  * restriction is needed to prevent violation of
 1615                  * link-local scope zone.  Attempts to add a laggport
 1616                  * interface which has inet6 addresses triggers
 1617                  * removal of all inet6 addresses on the member
 1618                  * interface.
 1619                  */
 1620                 if (in6ifa_llaonifp(tpif)) {
 1621                         in6_ifdetach(tpif);
 1622                                 if_printf(sc->sc_ifp,
 1623                                     "IPv6 addresses on %s have been removed "
 1624                                     "before adding it as a member to prevent "
 1625                                     "IPv6 address scope violation.\n",
 1626                                     tpif->if_xname);
 1627                 }
 1628 #endif
 1629                 oldmtu = ifp->if_mtu;
 1630                 LAGG_XLOCK(sc);
 1631                 error = lagg_port_create(sc, tpif);
 1632                 LAGG_XUNLOCK(sc);
 1633                 if_rele(tpif);
 1634 
 1635                 /*
 1636                  * LAGG MTU may change during addition of the first port.
 1637                  * If it did, do network layer specific procedure.
 1638                  */
 1639                 if (ifp->if_mtu != oldmtu) {
 1640 #ifdef INET6
 1641                         nd6_setmtu(ifp);
 1642 #endif
 1643                         rt_updatemtu(ifp);
 1644                 }
 1645 
 1646                 VLAN_CAPABILITIES(ifp);
 1647                 break;
 1648         case SIOCSLAGGDELPORT:
 1649                 error = priv_check(td, PRIV_NET_LAGG);
 1650                 if (error)
 1651                         break;
 1652                 if (rp->rp_portname[0] == '\0' ||
 1653                     (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 1654                         error = EINVAL;
 1655                         break;
 1656                 }
 1657 
 1658                 LAGG_XLOCK(sc);
 1659                 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 1660                     lp->lp_softc != sc) {
 1661                         error = ENOENT;
 1662                         LAGG_XUNLOCK(sc);
 1663                         if_rele(tpif);
 1664                         break;
 1665                 }
 1666 
 1667                 error = lagg_port_destroy(lp, 1);
 1668                 LAGG_XUNLOCK(sc);
 1669                 if_rele(tpif);
 1670                 VLAN_CAPABILITIES(ifp);
 1671                 break;
 1672         case SIOCSIFFLAGS:
 1673                 /* Set flags on ports too */
 1674                 LAGG_XLOCK(sc);
 1675                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1676                         lagg_setflags(lp, 1);
 1677                 }
 1678 
 1679                 if (!(ifp->if_flags & IFF_UP) &&
 1680                     (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1681                         /*
 1682                          * If interface is marked down and it is running,
 1683                          * then stop and disable it.
 1684                          */
 1685                         lagg_stop(sc);
 1686                         LAGG_XUNLOCK(sc);
 1687                 } else if ((ifp->if_flags & IFF_UP) &&
 1688                     !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 1689                         /*
 1690                          * If interface is marked up and it is stopped, then
 1691                          * start it.
 1692                          */
 1693                         LAGG_XUNLOCK(sc);
 1694                         (*ifp->if_init)(sc);
 1695                 } else
 1696                         LAGG_XUNLOCK(sc);
 1697                 break;
 1698         case SIOCADDMULTI:
 1699         case SIOCDELMULTI:
 1700                 LAGG_XLOCK(sc);
 1701                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1702                         lagg_clrmulti(lp);
 1703                         lagg_setmulti(lp);
 1704                 }
 1705                 LAGG_XUNLOCK(sc);
 1706                 error = 0;
 1707                 break;
 1708         case SIOCSIFMEDIA:
 1709         case SIOCGIFMEDIA:
 1710                 if (ifp->if_type == IFT_INFINIBAND)
 1711                         error = EINVAL;
 1712                 else
 1713                         error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 1714                 break;
 1715 
 1716         case SIOCSIFCAP:
 1717         case SIOCSIFCAPNV:
 1718                 LAGG_XLOCK(sc);
 1719                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1720                         if (lp->lp_ioctl != NULL)
 1721                                 (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1722                 }
 1723                 lagg_capabilities(sc);
 1724                 LAGG_XUNLOCK(sc);
 1725                 VLAN_CAPABILITIES(ifp);
 1726                 error = 0;
 1727                 break;
 1728 
 1729         case SIOCGIFCAPNV:
 1730                 error = 0;
 1731                 break;
 1732 
 1733         case SIOCSIFMTU:
 1734                 LAGG_XLOCK(sc);
 1735                 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1736                         if (lp->lp_ioctl != NULL)
 1737                                 error = (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1738                         else
 1739                                 error = EINVAL;
 1740                         if (error != 0) {
 1741                                 if_printf(ifp,
 1742                                     "failed to change MTU to %d on port %s, "
 1743                                     "reverting all ports to original MTU (%d)\n",
 1744                                     ifr->ifr_mtu, lp->lp_ifp->if_xname, ifp->if_mtu);
 1745                                 break;
 1746                         }
 1747                 }
 1748                 if (error == 0) {
 1749                         ifp->if_mtu = ifr->ifr_mtu;
 1750                 } else {
 1751                         /* set every port back to the original MTU */
 1752                         ifr->ifr_mtu = ifp->if_mtu;
 1753                         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 1754                                 if (lp->lp_ioctl != NULL)
 1755                                         (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 1756                         }
 1757                 }
 1758                 lagg_capabilities(sc);
 1759                 LAGG_XUNLOCK(sc);
 1760                 VLAN_CAPABILITIES(ifp);
 1761                 break;
 1762 
 1763         default:
 1764                 error = ether_ioctl(ifp, cmd, data);
 1765                 break;
 1766         }
 1767         return (error);
 1768 }
 1769 
 1770 #if defined(KERN_TLS) || defined(RATELIMIT)
 1771 #ifdef RATELIMIT
 1772 static const struct if_snd_tag_sw lagg_snd_tag_ul_sw = {
 1773         .snd_tag_modify = lagg_snd_tag_modify,
 1774         .snd_tag_query = lagg_snd_tag_query,
 1775         .snd_tag_free = lagg_snd_tag_free,
 1776         .next_snd_tag = lagg_next_snd_tag,
 1777         .type = IF_SND_TAG_TYPE_UNLIMITED
 1778 };
 1779 
 1780 static const struct if_snd_tag_sw lagg_snd_tag_rl_sw = {
 1781         .snd_tag_modify = lagg_snd_tag_modify,
 1782         .snd_tag_query = lagg_snd_tag_query,
 1783         .snd_tag_free = lagg_snd_tag_free,
 1784         .next_snd_tag = lagg_next_snd_tag,
 1785         .type = IF_SND_TAG_TYPE_RATE_LIMIT
 1786 };
 1787 #endif
 1788 
 1789 #ifdef KERN_TLS
 1790 static const struct if_snd_tag_sw lagg_snd_tag_tls_sw = {
 1791         .snd_tag_modify = lagg_snd_tag_modify,
 1792         .snd_tag_query = lagg_snd_tag_query,
 1793         .snd_tag_free = lagg_snd_tag_free,
 1794         .next_snd_tag = lagg_next_snd_tag,
 1795         .type = IF_SND_TAG_TYPE_TLS
 1796 };
 1797 
 1798 #ifdef RATELIMIT
 1799 static const struct if_snd_tag_sw lagg_snd_tag_tls_rl_sw = {
 1800         .snd_tag_modify = lagg_snd_tag_modify,
 1801         .snd_tag_query = lagg_snd_tag_query,
 1802         .snd_tag_free = lagg_snd_tag_free,
 1803         .next_snd_tag = lagg_next_snd_tag,
 1804         .type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT
 1805 };
 1806 #endif
 1807 #endif
 1808 
 1809 static inline struct lagg_snd_tag *
 1810 mst_to_lst(struct m_snd_tag *mst)
 1811 {
 1812 
 1813         return (__containerof(mst, struct lagg_snd_tag, com));
 1814 }
 1815 
 1816 /*
 1817  * Look up the port used by a specific flow.  This only works for lagg
 1818  * protocols with deterministic port mappings (e.g. not roundrobin).
 1819  * In addition protocols which use a hash to map flows to ports must
 1820  * be configured to use the mbuf flowid rather than hashing packet
 1821  * contents.
 1822  */
 1823 static struct lagg_port *
 1824 lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype,
 1825     uint8_t numa_domain)
 1826 {
 1827         struct lagg_softc *sc;
 1828         struct lagg_port *lp;
 1829         struct lagg_lb *lb;
 1830         uint32_t hash, p;
 1831         int err;
 1832 
 1833         sc = ifp->if_softc;
 1834 
 1835         switch (sc->sc_proto) {
 1836         case LAGG_PROTO_FAILOVER:
 1837                 return (lagg_link_active(sc, sc->sc_primary));
 1838         case LAGG_PROTO_LOADBALANCE:
 1839                 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
 1840                     flowtype == M_HASHTYPE_NONE)
 1841                         return (NULL);
 1842                 p = flowid >> sc->flowid_shift;
 1843                 p %= sc->sc_count;
 1844                 lb = (struct lagg_lb *)sc->sc_psc;
 1845                 lp = lb->lb_ports[p];
 1846                 return (lagg_link_active(sc, lp));
 1847         case LAGG_PROTO_LACP:
 1848                 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
 1849                     flowtype == M_HASHTYPE_NONE)
 1850                         return (NULL);
 1851                 hash = flowid >> sc->flowid_shift;
 1852                 return (lacp_select_tx_port_by_hash(sc, hash, numa_domain, &err));
 1853         default:
 1854                 return (NULL);
 1855         }
 1856 }
 1857 
 1858 static int
 1859 lagg_snd_tag_alloc(struct ifnet *ifp,
 1860     union if_snd_tag_alloc_params *params,
 1861     struct m_snd_tag **ppmt)
 1862 {
 1863         struct epoch_tracker et;
 1864         const struct if_snd_tag_sw *sw;
 1865         struct lagg_snd_tag *lst;
 1866         struct lagg_port *lp;
 1867         struct ifnet *lp_ifp;
 1868         struct m_snd_tag *mst;
 1869         int error;
 1870 
 1871         switch (params->hdr.type) {
 1872 #ifdef RATELIMIT
 1873         case IF_SND_TAG_TYPE_UNLIMITED:
 1874                 sw = &lagg_snd_tag_ul_sw;
 1875                 break;
 1876         case IF_SND_TAG_TYPE_RATE_LIMIT:
 1877                 sw = &lagg_snd_tag_rl_sw;
 1878                 break;
 1879 #endif
 1880 #ifdef KERN_TLS
 1881         case IF_SND_TAG_TYPE_TLS:
 1882                 sw = &lagg_snd_tag_tls_sw;
 1883                 break;
 1884         case IF_SND_TAG_TYPE_TLS_RX:
 1885                 /* Return tag from port interface directly. */
 1886                 sw = NULL;
 1887                 break;
 1888 #ifdef RATELIMIT
 1889         case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
 1890                 sw = &lagg_snd_tag_tls_rl_sw;
 1891                 break;
 1892 #endif
 1893 #endif
 1894         default:
 1895                 return (EOPNOTSUPP);
 1896         }
 1897 
 1898         NET_EPOCH_ENTER(et);
 1899         lp = lookup_snd_tag_port(ifp, params->hdr.flowid,
 1900             params->hdr.flowtype, params->hdr.numa_domain);
 1901         if (lp == NULL) {
 1902                 NET_EPOCH_EXIT(et);
 1903                 return (EOPNOTSUPP);
 1904         }
 1905         if (lp->lp_ifp == NULL) {
 1906                 NET_EPOCH_EXIT(et);
 1907                 return (EOPNOTSUPP);
 1908         }
 1909         lp_ifp = lp->lp_ifp;
 1910         if_ref(lp_ifp);
 1911         NET_EPOCH_EXIT(et);
 1912 
 1913         if (sw != NULL) {
 1914                 lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT);
 1915                 if (lst == NULL) {
 1916                         if_rele(lp_ifp);
 1917                         return (ENOMEM);
 1918                 }
 1919         } else
 1920                 lst = NULL;
 1921 
 1922         error = m_snd_tag_alloc(lp_ifp, params, &mst);
 1923         if_rele(lp_ifp);
 1924         if (error) {
 1925                 free(lst, M_LAGG);
 1926                 return (error);
 1927         }
 1928 
 1929         if (sw != NULL) {
 1930                 m_snd_tag_init(&lst->com, ifp, sw);
 1931                 lst->tag = mst;
 1932 
 1933                 *ppmt = &lst->com;
 1934         } else
 1935                 *ppmt = mst;
 1936 
 1937         return (0);
 1938 }
 1939 
 1940 static struct m_snd_tag *
 1941 lagg_next_snd_tag(struct m_snd_tag *mst)
 1942 {
 1943         struct lagg_snd_tag *lst;
 1944 
 1945         lst = mst_to_lst(mst);
 1946         return (lst->tag);
 1947 }
 1948 
 1949 static int
 1950 lagg_snd_tag_modify(struct m_snd_tag *mst,
 1951     union if_snd_tag_modify_params *params)
 1952 {
 1953         struct lagg_snd_tag *lst;
 1954 
 1955         lst = mst_to_lst(mst);
 1956         return (lst->tag->sw->snd_tag_modify(lst->tag, params));
 1957 }
 1958 
 1959 static int
 1960 lagg_snd_tag_query(struct m_snd_tag *mst,
 1961     union if_snd_tag_query_params *params)
 1962 {
 1963         struct lagg_snd_tag *lst;
 1964 
 1965         lst = mst_to_lst(mst);
 1966         return (lst->tag->sw->snd_tag_query(lst->tag, params));
 1967 }
 1968 
 1969 static void
 1970 lagg_snd_tag_free(struct m_snd_tag *mst)
 1971 {
 1972         struct lagg_snd_tag *lst;
 1973 
 1974         lst = mst_to_lst(mst);
 1975         m_snd_tag_rele(lst->tag);
 1976         free(lst, M_LAGG);
 1977 }
 1978 
 1979 static void
 1980 lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
 1981 {
 1982         /*
 1983          * For lagg, we have an indirect
 1984          * interface. The caller needs to
 1985          * get a ratelimit tag on the actual
 1986          * interface the flow will go on.
 1987          */
 1988         q->rate_table = NULL;
 1989         q->flags = RT_IS_INDIRECT;
 1990         q->max_flows = 0;
 1991         q->number_of_rates = 0;
 1992 }
 1993 #endif
 1994 
 1995 static int
 1996 lagg_setmulti(struct lagg_port *lp)
 1997 {
 1998         struct lagg_softc *sc = lp->lp_softc;
 1999         struct ifnet *ifp = lp->lp_ifp;
 2000         struct ifnet *scifp = sc->sc_ifp;
 2001         struct lagg_mc *mc;
 2002         struct ifmultiaddr *ifma;
 2003         int error;
 2004 
 2005         IF_ADDR_WLOCK(scifp);
 2006         CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
 2007                 if (ifma->ifma_addr->sa_family != AF_LINK)
 2008                         continue;
 2009                 mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT);
 2010                 if (mc == NULL) {
 2011                         IF_ADDR_WUNLOCK(scifp);
 2012                         return (ENOMEM);
 2013                 }
 2014                 bcopy(ifma->ifma_addr, &mc->mc_addr,
 2015                     ifma->ifma_addr->sa_len);
 2016                 mc->mc_addr.sdl_index = ifp->if_index;
 2017                 mc->mc_ifma = NULL;
 2018                 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
 2019         }
 2020         IF_ADDR_WUNLOCK(scifp);
 2021         SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
 2022                 error = if_addmulti(ifp,
 2023                     (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
 2024                 if (error)
 2025                         return (error);
 2026         }
 2027         return (0);
 2028 }
 2029 
 2030 static int
 2031 lagg_clrmulti(struct lagg_port *lp)
 2032 {
 2033         struct lagg_mc *mc;
 2034 
 2035         LAGG_XLOCK_ASSERT(lp->lp_softc);
 2036         while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
 2037                 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
 2038                 if (mc->mc_ifma && lp->lp_detaching == 0)
 2039                         if_delmulti_ifma(mc->mc_ifma);
 2040                 free(mc, M_LAGG);
 2041         }
 2042         return (0);
 2043 }
 2044 
 2045 static void
 2046 lagg_setcaps(struct lagg_port *lp, int cap, int cap2)
 2047 {
 2048         struct ifreq ifr;
 2049         struct siocsifcapnv_driver_data drv_ioctl_data;
 2050 
 2051         if (lp->lp_ifp->if_capenable == cap &&
 2052             lp->lp_ifp->if_capenable2 == cap2)
 2053                 return;
 2054         if (lp->lp_ioctl == NULL)
 2055                 return;
 2056         /* XXX */
 2057         if ((lp->lp_ifp->if_capabilities & IFCAP_NV) != 0) {
 2058                 drv_ioctl_data.reqcap = cap;
 2059                 drv_ioctl_data.reqcap2 = cap2;
 2060                 drv_ioctl_data.nvcap = NULL;
 2061                 (*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAPNV,
 2062                     (caddr_t)&drv_ioctl_data);
 2063         } else {
 2064                 ifr.ifr_reqcap = cap;
 2065                 (*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAP, (caddr_t)&ifr);
 2066         }
 2067 }
 2068 
 2069 /* Handle a ref counted flag that should be set on the lagg port as well */
 2070 static int
 2071 lagg_setflag(struct lagg_port *lp, int flag, int status,
 2072     int (*func)(struct ifnet *, int))
 2073 {
 2074         struct lagg_softc *sc = lp->lp_softc;
 2075         struct ifnet *scifp = sc->sc_ifp;
 2076         struct ifnet *ifp = lp->lp_ifp;
 2077         int error;
 2078 
 2079         LAGG_XLOCK_ASSERT(sc);
 2080 
 2081         status = status ? (scifp->if_flags & flag) : 0;
 2082         /* Now "status" contains the flag value or 0 */
 2083 
 2084         /*
 2085          * See if recorded ports status is different from what
 2086          * we want it to be.  If it is, flip it.  We record ports
 2087          * status in lp_ifflags so that we won't clear ports flag
 2088          * we haven't set.  In fact, we don't clear or set ports
 2089          * flags directly, but get or release references to them.
 2090          * That's why we can be sure that recorded flags still are
 2091          * in accord with actual ports flags.
 2092          */
 2093         if (status != (lp->lp_ifflags & flag)) {
 2094                 error = (*func)(ifp, status);
 2095                 if (error)
 2096                         return (error);
 2097                 lp->lp_ifflags &= ~flag;
 2098                 lp->lp_ifflags |= status;
 2099         }
 2100         return (0);
 2101 }
 2102 
 2103 /*
 2104  * Handle IFF_* flags that require certain changes on the lagg port
 2105  * if "status" is true, update ports flags respective to the lagg
 2106  * if "status" is false, forcedly clear the flags set on port.
 2107  */
 2108 static int
 2109 lagg_setflags(struct lagg_port *lp, int status)
 2110 {
 2111         int error, i;
 2112 
 2113         for (i = 0; lagg_pflags[i].flag; i++) {
 2114                 error = lagg_setflag(lp, lagg_pflags[i].flag,
 2115                     status, lagg_pflags[i].func);
 2116                 if (error)
 2117                         return (error);
 2118         }
 2119         return (0);
 2120 }
 2121 
 2122 static int
 2123 lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m)
 2124 {
 2125         struct epoch_tracker et;
 2126         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 2127         int error;
 2128 
 2129 #if defined(KERN_TLS) || defined(RATELIMIT)
 2130         if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
 2131                 MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
 2132 #endif
 2133         NET_EPOCH_ENTER(et);
 2134         /* We need a Tx algorithm and at least one port */
 2135         if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
 2136                 NET_EPOCH_EXIT(et);
 2137                 m_freem(m);
 2138                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2139                 return (ENXIO);
 2140         }
 2141 
 2142         ETHER_BPF_MTAP(ifp, m);
 2143 
 2144         error = lagg_proto_start(sc, m);
 2145         NET_EPOCH_EXIT(et);
 2146         return (error);
 2147 }
 2148 
 2149 static int
 2150 lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m)
 2151 {
 2152         struct epoch_tracker et;
 2153         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 2154         int error;
 2155 
 2156 #if defined(KERN_TLS) || defined(RATELIMIT)
 2157         if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
 2158                 MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
 2159 #endif
 2160         NET_EPOCH_ENTER(et);
 2161         /* We need a Tx algorithm and at least one port */
 2162         if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
 2163                 NET_EPOCH_EXIT(et);
 2164                 m_freem(m);
 2165                 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 2166                 return (ENXIO);
 2167         }
 2168 
 2169         INFINIBAND_BPF_MTAP(ifp, m);
 2170 
 2171         error = lagg_proto_start(sc, m);
 2172         NET_EPOCH_EXIT(et);
 2173         return (error);
 2174 }
 2175 
 2176 /*
 2177  * The ifp->if_qflush entry point for lagg(4) is no-op.
 2178  */
 2179 static void
 2180 lagg_qflush(struct ifnet *ifp __unused)
 2181 {
 2182 }
 2183 
 2184 static struct mbuf *
 2185 lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m)
 2186 {
 2187         struct epoch_tracker et;
 2188         struct lagg_port *lp = ifp->if_lagg;
 2189         struct lagg_softc *sc = lp->lp_softc;
 2190         struct ifnet *scifp = sc->sc_ifp;
 2191 
 2192         NET_EPOCH_ENTER(et);
 2193         if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 2194             lp->lp_detaching != 0 ||
 2195             sc->sc_proto == LAGG_PROTO_NONE) {
 2196                 NET_EPOCH_EXIT(et);
 2197                 m_freem(m);
 2198                 return (NULL);
 2199         }
 2200 
 2201         ETHER_BPF_MTAP(scifp, m);
 2202 
 2203         m = lagg_proto_input(sc, lp, m);
 2204         if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
 2205                 m_freem(m);
 2206                 m = NULL;
 2207         }
 2208 
 2209 #ifdef DEV_NETMAP
 2210         if (m != NULL && scifp->if_capenable & IFCAP_NETMAP) {
 2211                 scifp->if_input(scifp, m);
 2212                 m = NULL;
 2213         }
 2214 #endif  /* DEV_NETMAP */
 2215 
 2216         NET_EPOCH_EXIT(et);
 2217         return (m);
 2218 }
 2219 
 2220 static struct mbuf *
 2221 lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m)
 2222 {
 2223         struct epoch_tracker et;
 2224         struct lagg_port *lp = ifp->if_lagg;
 2225         struct lagg_softc *sc = lp->lp_softc;
 2226         struct ifnet *scifp = sc->sc_ifp;
 2227 
 2228         NET_EPOCH_ENTER(et);
 2229         if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 2230             lp->lp_detaching != 0 ||
 2231             sc->sc_proto == LAGG_PROTO_NONE) {
 2232                 NET_EPOCH_EXIT(et);
 2233                 m_freem(m);
 2234                 return (NULL);
 2235         }
 2236 
 2237         INFINIBAND_BPF_MTAP(scifp, m);
 2238 
 2239         m = lagg_proto_input(sc, lp, m);
 2240         if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
 2241                 m_freem(m);
 2242                 m = NULL;
 2243         }
 2244 
 2245         NET_EPOCH_EXIT(et);
 2246         return (m);
 2247 }
 2248 
 2249 static int
 2250 lagg_media_change(struct ifnet *ifp)
 2251 {
 2252         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 2253 
 2254         if (sc->sc_ifflags & IFF_DEBUG)
 2255                 printf("%s\n", __func__);
 2256 
 2257         /* Ignore */
 2258         return (0);
 2259 }
 2260 
 2261 static void
 2262 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
 2263 {
 2264         struct epoch_tracker et;
 2265         struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 2266         struct lagg_port *lp;
 2267 
 2268         imr->ifm_status = IFM_AVALID;
 2269         imr->ifm_active = IFM_ETHER | IFM_AUTO;
 2270 
 2271         NET_EPOCH_ENTER(et);
 2272         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 2273                 if (LAGG_PORTACTIVE(lp))
 2274                         imr->ifm_status |= IFM_ACTIVE;
 2275         }
 2276         NET_EPOCH_EXIT(et);
 2277 }
 2278 
 2279 static void
 2280 lagg_linkstate(struct lagg_softc *sc)
 2281 {
 2282         struct epoch_tracker et;
 2283         struct lagg_port *lp;
 2284         int new_link = LINK_STATE_DOWN;
 2285         uint64_t speed;
 2286 
 2287         LAGG_XLOCK_ASSERT(sc);
 2288 
 2289         /* LACP handles link state itself */
 2290         if (sc->sc_proto == LAGG_PROTO_LACP)
 2291                 return;
 2292 
 2293         /* Our link is considered up if at least one of our ports is active */
 2294         NET_EPOCH_ENTER(et);
 2295         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 2296                 if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
 2297                         new_link = LINK_STATE_UP;
 2298                         break;
 2299                 }
 2300         }
 2301         NET_EPOCH_EXIT(et);
 2302         if_link_state_change(sc->sc_ifp, new_link);
 2303 
 2304         /* Update if_baudrate to reflect the max possible speed */
 2305         switch (sc->sc_proto) {
 2306                 case LAGG_PROTO_FAILOVER:
 2307                         sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
 2308                             sc->sc_primary->lp_ifp->if_baudrate : 0;
 2309                         break;
 2310                 case LAGG_PROTO_ROUNDROBIN:
 2311                 case LAGG_PROTO_LOADBALANCE:
 2312                 case LAGG_PROTO_BROADCAST:
 2313                         speed = 0;
 2314                         NET_EPOCH_ENTER(et);
 2315                         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2316                                 speed += lp->lp_ifp->if_baudrate;
 2317                         NET_EPOCH_EXIT(et);
 2318                         sc->sc_ifp->if_baudrate = speed;
 2319                         break;
 2320                 case LAGG_PROTO_LACP:
 2321                         /* LACP updates if_baudrate itself */
 2322                         break;
 2323         }
 2324 }
 2325 
 2326 static void
 2327 lagg_port_state(struct ifnet *ifp, int state)
 2328 {
 2329         struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
 2330         struct lagg_softc *sc = NULL;
 2331 
 2332         if (lp != NULL)
 2333                 sc = lp->lp_softc;
 2334         if (sc == NULL)
 2335                 return;
 2336 
 2337         LAGG_XLOCK(sc);
 2338         lagg_linkstate(sc);
 2339         lagg_proto_linkstate(sc, lp);
 2340         LAGG_XUNLOCK(sc);
 2341 }
 2342 
 2343 struct lagg_port *
 2344 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
 2345 {
 2346         struct lagg_port *lp_next, *rval = NULL;
 2347 
 2348         /*
 2349          * Search a port which reports an active link state.
 2350          */
 2351 
 2352 #ifdef INVARIANTS
 2353         /*
 2354          * This is called with either in the network epoch
 2355          * or with LAGG_XLOCK(sc) held.
 2356          */
 2357         if (!in_epoch(net_epoch_preempt))
 2358                 LAGG_XLOCK_ASSERT(sc);
 2359 #endif
 2360 
 2361         if (lp == NULL)
 2362                 goto search;
 2363         if (LAGG_PORTACTIVE(lp)) {
 2364                 rval = lp;
 2365                 goto found;
 2366         }
 2367         if ((lp_next = CK_SLIST_NEXT(lp, lp_entries)) != NULL &&
 2368             LAGG_PORTACTIVE(lp_next)) {
 2369                 rval = lp_next;
 2370                 goto found;
 2371         }
 2372 
 2373 search:
 2374         CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 2375                 if (LAGG_PORTACTIVE(lp_next)) {
 2376                         return (lp_next);
 2377                 }
 2378         }
 2379 found:
 2380         return (rval);
 2381 }
 2382 
 2383 int
 2384 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
 2385 {
 2386 
 2387 #if defined(KERN_TLS) || defined(RATELIMIT)
 2388         if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
 2389                 struct lagg_snd_tag *lst;
 2390                 struct m_snd_tag *mst;
 2391 
 2392                 mst = m->m_pkthdr.snd_tag;
 2393                 lst = mst_to_lst(mst);
 2394                 if (lst->tag->ifp != ifp) {
 2395                         m_freem(m);
 2396                         return (EAGAIN);
 2397                 }
 2398                 m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag);
 2399                 m_snd_tag_rele(mst);
 2400         }
 2401 #endif
 2402         return (ifp->if_transmit)(ifp, m);
 2403 }
 2404 
 2405 /*
 2406  * Simple round robin aggregation
 2407  */
 2408 static void
 2409 lagg_rr_attach(struct lagg_softc *sc)
 2410 {
 2411         sc->sc_seq = 0;
 2412         sc->sc_stride = 1;
 2413 }
 2414 
 2415 static int
 2416 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
 2417 {
 2418         struct lagg_port *lp;
 2419         uint32_t p;
 2420 
 2421         p = atomic_fetchadd_32(&sc->sc_seq, 1);
 2422         p /= sc->sc_stride;
 2423         p %= sc->sc_count;
 2424         lp = CK_SLIST_FIRST(&sc->sc_ports);
 2425 
 2426         while (p--)
 2427                 lp = CK_SLIST_NEXT(lp, lp_entries);
 2428 
 2429         /*
 2430          * Check the port's link state. This will return the next active
 2431          * port if the link is down or the port is NULL.
 2432          */
 2433         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 2434                 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 2435                 m_freem(m);
 2436                 return (ENETDOWN);
 2437         }
 2438 
 2439         /* Send mbuf */
 2440         return (lagg_enqueue(lp->lp_ifp, m));
 2441 }
 2442 
 2443 static struct mbuf *
 2444 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2445 {
 2446         struct ifnet *ifp = sc->sc_ifp;
 2447 
 2448         /* Just pass in the packet to our lagg device */
 2449         m->m_pkthdr.rcvif = ifp;
 2450 
 2451         return (m);
 2452 }
 2453 
 2454 /*
 2455  * Broadcast mode
 2456  */
 2457 static int
 2458 lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
 2459 {
 2460         int errors = 0;
 2461         int ret;
 2462         struct lagg_port *lp, *last = NULL;
 2463         struct mbuf *m0;
 2464 
 2465         NET_EPOCH_ASSERT();
 2466         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 2467                 if (!LAGG_PORTACTIVE(lp))
 2468                         continue;
 2469 
 2470                 if (last != NULL) {
 2471                         m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 2472                         if (m0 == NULL) {
 2473                                 ret = ENOBUFS;
 2474                                 errors++;
 2475                                 break;
 2476                         }
 2477                         lagg_enqueue(last->lp_ifp, m0);
 2478                 }
 2479                 last = lp;
 2480         }
 2481 
 2482         if (last == NULL) {
 2483                 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 2484                 m_freem(m);
 2485                 return (ENOENT);
 2486         }
 2487         if ((last = lagg_link_active(sc, last)) == NULL) {
 2488                 errors++;
 2489                 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors);
 2490                 m_freem(m);
 2491                 return (ENETDOWN);
 2492         }
 2493 
 2494         ret = lagg_enqueue(last->lp_ifp, m);
 2495         if (errors != 0)
 2496                 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors);
 2497 
 2498         return (ret);
 2499 }
 2500 
 2501 static struct mbuf*
 2502 lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2503 {
 2504         struct ifnet *ifp = sc->sc_ifp;
 2505 
 2506         /* Just pass in the packet to our lagg device */
 2507         m->m_pkthdr.rcvif = ifp;
 2508         return (m);
 2509 }
 2510 
 2511 /*
 2512  * Active failover
 2513  */
 2514 static int
 2515 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
 2516 {
 2517         struct lagg_port *lp;
 2518 
 2519         /* Use the master port if active or the next available port */
 2520         if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
 2521                 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 2522                 m_freem(m);
 2523                 return (ENETDOWN);
 2524         }
 2525 
 2526         /* Send mbuf */
 2527         return (lagg_enqueue(lp->lp_ifp, m));
 2528 }
 2529 
 2530 static struct mbuf *
 2531 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2532 {
 2533         struct ifnet *ifp = sc->sc_ifp;
 2534         struct lagg_port *tmp_tp;
 2535 
 2536         if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
 2537                 m->m_pkthdr.rcvif = ifp;
 2538                 return (m);
 2539         }
 2540 
 2541         if (!LAGG_PORTACTIVE(sc->sc_primary)) {
 2542                 tmp_tp = lagg_link_active(sc, sc->sc_primary);
 2543                 /*
 2544                  * If tmp_tp is null, we've received a packet when all
 2545                  * our links are down. Weird, but process it anyways.
 2546                  */
 2547                 if ((tmp_tp == NULL || tmp_tp == lp)) {
 2548                         m->m_pkthdr.rcvif = ifp;
 2549                         return (m);
 2550                 }
 2551         }
 2552 
 2553         m_freem(m);
 2554         return (NULL);
 2555 }
 2556 
 2557 /*
 2558  * Loadbalancing
 2559  */
 2560 static void
 2561 lagg_lb_attach(struct lagg_softc *sc)
 2562 {
 2563         struct lagg_port *lp;
 2564         struct lagg_lb *lb;
 2565 
 2566         LAGG_XLOCK_ASSERT(sc);
 2567         lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO);
 2568         lb->lb_key = m_ether_tcpip_hash_init();
 2569         sc->sc_psc = lb;
 2570 
 2571         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2572                 lagg_lb_port_create(lp);
 2573 }
 2574 
 2575 static void
 2576 lagg_lb_detach(struct lagg_softc *sc)
 2577 {
 2578         struct lagg_lb *lb;
 2579 
 2580         lb = (struct lagg_lb *)sc->sc_psc;
 2581         if (lb != NULL)
 2582                 free(lb, M_LAGG);
 2583 }
 2584 
 2585 static int
 2586 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
 2587 {
 2588         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 2589         struct lagg_port *lp_next;
 2590         int i = 0, rv;
 2591 
 2592         rv = 0;
 2593         bzero(&lb->lb_ports, sizeof(lb->lb_ports));
 2594         LAGG_XLOCK_ASSERT(sc);
 2595         CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 2596                 if (lp_next == lp)
 2597                         continue;
 2598                 if (i >= LAGG_MAX_PORTS) {
 2599                         rv = EINVAL;
 2600                         break;
 2601                 }
 2602                 if (sc->sc_ifflags & IFF_DEBUG)
 2603                         printf("%s: port %s at index %d\n",
 2604                             sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
 2605                 lb->lb_ports[i++] = lp_next;
 2606         }
 2607 
 2608         return (rv);
 2609 }
 2610 
 2611 static int
 2612 lagg_lb_port_create(struct lagg_port *lp)
 2613 {
 2614         struct lagg_softc *sc = lp->lp_softc;
 2615         return (lagg_lb_porttable(sc, NULL));
 2616 }
 2617 
 2618 static void
 2619 lagg_lb_port_destroy(struct lagg_port *lp)
 2620 {
 2621         struct lagg_softc *sc = lp->lp_softc;
 2622         lagg_lb_porttable(sc, lp);
 2623 }
 2624 
 2625 static int
 2626 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
 2627 {
 2628         struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 2629         struct lagg_port *lp = NULL;
 2630         uint32_t p = 0;
 2631 
 2632         if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
 2633             M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 2634                 p = m->m_pkthdr.flowid >> sc->flowid_shift;
 2635         else
 2636                 p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
 2637         p %= sc->sc_count;
 2638         lp = lb->lb_ports[p];
 2639 
 2640         /*
 2641          * Check the port's link state. This will return the next active
 2642          * port if the link is down or the port is NULL.
 2643          */
 2644         if ((lp = lagg_link_active(sc, lp)) == NULL) {
 2645                 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 2646                 m_freem(m);
 2647                 return (ENETDOWN);
 2648         }
 2649 
 2650         /* Send mbuf */
 2651         return (lagg_enqueue(lp->lp_ifp, m));
 2652 }
 2653 
 2654 static struct mbuf *
 2655 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2656 {
 2657         struct ifnet *ifp = sc->sc_ifp;
 2658 
 2659         /* Just pass in the packet to our lagg device */
 2660         m->m_pkthdr.rcvif = ifp;
 2661 
 2662         return (m);
 2663 }
 2664 
 2665 /*
 2666  * 802.3ad LACP
 2667  */
 2668 static void
 2669 lagg_lacp_attach(struct lagg_softc *sc)
 2670 {
 2671         struct lagg_port *lp;
 2672 
 2673         lacp_attach(sc);
 2674         LAGG_XLOCK_ASSERT(sc);
 2675         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2676                 lacp_port_create(lp);
 2677 }
 2678 
 2679 static void
 2680 lagg_lacp_detach(struct lagg_softc *sc)
 2681 {
 2682         struct lagg_port *lp;
 2683         void *psc;
 2684 
 2685         LAGG_XLOCK_ASSERT(sc);
 2686         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2687                 lacp_port_destroy(lp);
 2688 
 2689         psc = sc->sc_psc;
 2690         sc->sc_psc = NULL;
 2691         lacp_detach(psc);
 2692 }
 2693 
 2694 static void
 2695 lagg_lacp_lladdr(struct lagg_softc *sc)
 2696 {
 2697         struct lagg_port *lp;
 2698 
 2699         LAGG_SXLOCK_ASSERT(sc);
 2700 
 2701         /* purge all the lacp ports */
 2702         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2703                 lacp_port_destroy(lp);
 2704 
 2705         /* add them back in */
 2706         CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 2707                 lacp_port_create(lp);
 2708 }
 2709 
 2710 static int
 2711 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
 2712 {
 2713         struct lagg_port *lp;
 2714         int err;
 2715 
 2716         lp = lacp_select_tx_port(sc, m, &err);
 2717         if (lp == NULL) {
 2718                 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 2719                 m_freem(m);
 2720                 return (err);
 2721         }
 2722 
 2723         /* Send mbuf */
 2724         return (lagg_enqueue(lp->lp_ifp, m));
 2725 }
 2726 
 2727 static struct mbuf *
 2728 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 2729 {
 2730         struct ifnet *ifp = sc->sc_ifp;
 2731         struct ether_header *eh;
 2732         u_short etype;
 2733 
 2734         eh = mtod(m, struct ether_header *);
 2735         etype = ntohs(eh->ether_type);
 2736 
 2737         /* Tap off LACP control messages */
 2738         if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
 2739                 m = lacp_input(lp, m);
 2740                 if (m == NULL)
 2741                         return (NULL);
 2742         }
 2743 
 2744         /*
 2745          * If the port is not collecting or not in the active aggregator then
 2746          * free and return.
 2747          */
 2748         if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
 2749                 m_freem(m);
 2750                 return (NULL);
 2751         }
 2752 
 2753         m->m_pkthdr.rcvif = ifp;
 2754         return (m);
 2755 }

Cache object: f687001fced5c15091b4513096d2ba2e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.