The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1980, 1986, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)if.c        8.5 (Berkeley) 1/9/95
   32  * $FreeBSD$
   33  */
   34 
   35 #include "opt_bpf.h"
   36 #include "opt_inet6.h"
   37 #include "opt_inet.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/types.h>
   41 #include <sys/conf.h>
   42 #include <sys/malloc.h>
   43 #include <sys/sbuf.h>
   44 #include <sys/bus.h>
   45 #include <sys/epoch.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/systm.h>
   48 #include <sys/priv.h>
   49 #include <sys/proc.h>
   50 #include <sys/socket.h>
   51 #include <sys/socketvar.h>
   52 #include <sys/protosw.h>
   53 #include <sys/kernel.h>
   54 #include <sys/lock.h>
   55 #include <sys/refcount.h>
   56 #include <sys/module.h>
   57 #include <sys/rwlock.h>
   58 #include <sys/sockio.h>
   59 #include <sys/syslog.h>
   60 #include <sys/sysctl.h>
   61 #include <sys/sysent.h>
   62 #include <sys/taskqueue.h>
   63 #include <sys/domain.h>
   64 #include <sys/jail.h>
   65 #include <sys/priv.h>
   66 #include <sys/sched.h>
   67 #include <sys/smp.h>
   68 
   69 #include <machine/stdarg.h>
   70 #include <vm/uma.h>
   71 
   72 #include <net/bpf.h>
   73 #include <net/ethernet.h>
   74 #include <net/if.h>
   75 #include <net/if_arp.h>
   76 #include <net/if_clone.h>
   77 #include <net/if_dl.h>
   78 #include <net/if_types.h>
   79 #include <net/if_var.h>
   80 #include <net/if_media.h>
   81 #include <net/if_vlan_var.h>
   82 #include <net/radix.h>
   83 #include <net/route.h>
   84 #include <net/vnet.h>
   85 
   86 #if defined(INET) || defined(INET6)
   87 #include <net/ethernet.h>
   88 #include <netinet/in.h>
   89 #include <netinet/in_var.h>
   90 #include <netinet/ip.h>
   91 #include <netinet/ip_carp.h>
   92 #ifdef INET
   93 #include <netinet/if_ether.h>
   94 #include <netinet/netdump/netdump.h>
   95 #endif /* INET */
   96 #ifdef INET6
   97 #include <netinet6/in6_var.h>
   98 #include <netinet6/in6_ifattach.h>
   99 #endif /* INET6 */
  100 #endif /* INET || INET6 */
  101 
  102 #include <security/mac/mac_framework.h>
  103 
  104 /*
  105  * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name
  106  * and ifr_ifru when it is used in SIOCGIFCONF.
  107  */
  108 _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) ==
  109     offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru");
  110 
  111 __read_mostly epoch_t net_epoch_preempt;
  112 __read_mostly epoch_t net_epoch;
  113 #ifdef COMPAT_FREEBSD32
  114 #include <sys/mount.h>
  115 #include <compat/freebsd32/freebsd32.h>
  116 
  117 struct ifreq_buffer32 {
  118         uint32_t        length;         /* (size_t) */
  119         uint32_t        buffer;         /* (void *) */
  120 };
  121 
  122 /*
  123  * Interface request structure used for socket
  124  * ioctl's.  All interface ioctl's must have parameter
  125  * definitions which begin with ifr_name.  The
  126  * remainder may be interface specific.
  127  */
  128 struct ifreq32 {
  129         char    ifr_name[IFNAMSIZ];             /* if name, e.g. "en0" */
  130         union {
  131                 struct sockaddr ifru_addr;
  132                 struct sockaddr ifru_dstaddr;
  133                 struct sockaddr ifru_broadaddr;
  134                 struct ifreq_buffer32 ifru_buffer;
  135                 short           ifru_flags[2];
  136                 short           ifru_index;
  137                 int             ifru_jid;
  138                 int             ifru_metric;
  139                 int             ifru_mtu;
  140                 int             ifru_phys;
  141                 int             ifru_media;
  142                 uint32_t        ifru_data;
  143                 int             ifru_cap[2];
  144                 u_int           ifru_fib;
  145                 u_char          ifru_vlan_pcp;
  146         } ifr_ifru;
  147 };
  148 CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32));
  149 CTASSERT(__offsetof(struct ifreq, ifr_ifru) ==
  150     __offsetof(struct ifreq32, ifr_ifru));
  151 
  152 struct ifgroupreq32 {
  153         char    ifgr_name[IFNAMSIZ];
  154         u_int   ifgr_len;
  155         union {
  156                 char            ifgru_group[IFNAMSIZ];
  157                 uint32_t        ifgru_groups;
  158         } ifgr_ifgru;
  159 };
  160 
  161 struct ifmediareq32 {
  162         char            ifm_name[IFNAMSIZ];
  163         int             ifm_current;
  164         int             ifm_mask;
  165         int             ifm_status;
  166         int             ifm_active;
  167         int             ifm_count;
  168         uint32_t        ifm_ulist;      /* (int *) */
  169 };
  170 #define SIOCGIFMEDIA32  _IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32)
  171 #define SIOCGIFXMEDIA32 _IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32)
  172 
  173 #define _CASE_IOC_IFGROUPREQ_32(cmd)                            \
  174     _IOC_NEWTYPE((cmd), struct ifgroupreq32): case
  175 #else /* !COMPAT_FREEBSD32 */
  176 #define _CASE_IOC_IFGROUPREQ_32(cmd)
  177 #endif /* !COMPAT_FREEBSD32 */
  178 
  179 #define CASE_IOC_IFGROUPREQ(cmd)        \
  180     _CASE_IOC_IFGROUPREQ_32(cmd)        \
  181     (cmd)
  182 
  183 union ifreq_union {
  184         struct ifreq    ifr;
  185 #ifdef COMPAT_FREEBSD32
  186         struct ifreq32  ifr32;
  187 #endif
  188 };
  189 
  190 union ifgroupreq_union {
  191         struct ifgroupreq ifgr;
  192 #ifdef COMPAT_FREEBSD32
  193         struct ifgroupreq32 ifgr32;
  194 #endif
  195 };
  196 
  197 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
  198 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
  199 
  200 SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
  201     &ifqmaxlen, 0, "max send queue size");
  202 
  203 /* Log link state change events */
  204 static int log_link_state_change = 1;
  205 
  206 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
  207         &log_link_state_change, 0,
  208         "log interface link state change events");
  209 
  210 /* Log promiscuous mode change events */
  211 static int log_promisc_mode_change = 1;
  212 
  213 SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
  214         &log_promisc_mode_change, 1,
  215         "log promiscuous mode change events");
  216 
  217 /* Interface description */
  218 static unsigned int ifdescr_maxlen = 1024;
  219 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
  220         &ifdescr_maxlen, 0,
  221         "administrative maximum length for interface description");
  222 
  223 static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
  224 
  225 /* global sx for non-critical path ifdescr */
  226 static struct sx ifdescr_sx;
  227 SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr");
  228 
  229 void    (*ng_ether_link_state_p)(struct ifnet *ifp, int state);
  230 void    (*lagg_linkstate_p)(struct ifnet *ifp, int state);
  231 /* These are external hooks for CARP. */
  232 void    (*carp_linkstate_p)(struct ifnet *ifp);
  233 void    (*carp_demote_adj_p)(int, char *);
  234 int     (*carp_master_p)(struct ifaddr *);
  235 #if defined(INET) || defined(INET6)
  236 int     (*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
  237 int     (*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
  238     const struct sockaddr *sa);
  239 int     (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);   
  240 int     (*carp_attach_p)(struct ifaddr *, int);
  241 void    (*carp_detach_p)(struct ifaddr *, bool);
  242 #endif
  243 #ifdef INET
  244 int     (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
  245 #endif
  246 #ifdef INET6
  247 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
  248 caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
  249     const struct in6_addr *taddr);
  250 #endif
  251 
  252 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
  253 
  254 /*
  255  * XXX: Style; these should be sorted alphabetically, and unprototyped
  256  * static functions should be prototyped. Currently they are sorted by
  257  * declaration order.
  258  */
  259 static void     if_attachdomain(void *);
  260 static void     if_attachdomain1(struct ifnet *);
  261 static int      ifconf(u_long, caddr_t);
  262 static void     *if_grow(void);
  263 static void     if_input_default(struct ifnet *, struct mbuf *);
  264 static int      if_requestencap_default(struct ifnet *, struct if_encap_req *);
  265 static void     if_route(struct ifnet *, int flag, int fam);
  266 static int      if_setflag(struct ifnet *, int, int, int *, int);
  267 static int      if_transmit(struct ifnet *ifp, struct mbuf *m);
  268 static void     if_unroute(struct ifnet *, int flag, int fam);
  269 static void     link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
  270 static int      if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
  271 static void     do_link_state_change(void *, int);
  272 static int      if_getgroup(struct ifgroupreq *, struct ifnet *);
  273 static int      if_getgroupmembers(struct ifgroupreq *);
  274 static void     if_delgroups(struct ifnet *);
  275 static void     if_attach_internal(struct ifnet *, int, struct if_clone *);
  276 static int      if_detach_internal(struct ifnet *, int, struct if_clone **);
  277 static void     if_link_ifnet(struct ifnet *);
  278 static bool     if_unlink_ifnet(struct ifnet *, bool);
  279 #ifdef VIMAGE
  280 static void     if_vmove(struct ifnet *, struct vnet *);
  281 #endif
  282 
  283 #ifdef INET6
  284 /*
  285  * XXX: declare here to avoid to include many inet6 related files..
  286  * should be more generalized?
  287  */
  288 extern void     nd6_setmtu(struct ifnet *);
  289 #endif
  290 
  291 /* ipsec helper hooks */
  292 VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
  293 VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
  294 
  295 VNET_DEFINE(int, if_index);
  296 int     ifqmaxlen = IFQ_MAXLEN;
  297 VNET_DEFINE(struct ifnethead, ifnet);   /* depend on static init XXX */
  298 VNET_DEFINE(struct ifgrouphead, ifg_head);
  299 
  300 VNET_DEFINE_STATIC(int, if_indexlim) = 8;
  301 
  302 /* Table of ifnet by index. */
  303 VNET_DEFINE(struct ifnet **, ifindex_table);
  304 
  305 #define V_if_indexlim           VNET(if_indexlim)
  306 #define V_ifindex_table         VNET(ifindex_table)
  307 
  308 /*
  309  * The global network interface list (V_ifnet) and related state (such as
  310  * if_index, if_indexlim, and ifindex_table) are protected by an sxlock.
  311  * This may be acquired to stabilise the list, or we may rely on NET_EPOCH.
  312  */
  313 struct rwlock ifnet_rwlock;
  314 RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
  315 struct sx ifnet_sxlock;
  316 SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
  317 
  318 struct sx ifnet_detach_sxlock;
  319 SX_SYSINIT_FLAGS(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx",
  320     SX_RECURSE);
  321 
  322 /*
  323  * The allocation of network interfaces is a rather non-atomic affair; we
  324  * need to select an index before we are ready to expose the interface for
  325  * use, so will use this pointer value to indicate reservation.
  326  */
  327 #define IFNET_HOLD      (void *)(uintptr_t)(-1)
  328 
  329 static  if_com_alloc_t *if_com_alloc[256];
  330 static  if_com_free_t *if_com_free[256];
  331 
  332 static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
  333 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
  334 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
  335 
  336 struct ifnet *
  337 ifnet_byindex_locked(u_short idx)
  338 {
  339 
  340         if (idx > V_if_index)
  341                 return (NULL);
  342         if (V_ifindex_table[idx] == IFNET_HOLD)
  343                 return (NULL);
  344         return (V_ifindex_table[idx]);
  345 }
  346 
  347 struct ifnet *
  348 ifnet_byindex(u_short idx)
  349 {
  350         struct ifnet *ifp;
  351 
  352         ifp = ifnet_byindex_locked(idx);
  353         return (ifp);
  354 }
  355 
  356 struct ifnet *
  357 ifnet_byindex_ref(u_short idx)
  358 {
  359         struct ifnet *ifp;
  360 
  361         IFNET_RLOCK_NOSLEEP();
  362         ifp = ifnet_byindex_locked(idx);
  363         if (ifp == NULL || (ifp->if_flags & IFF_DYING)) {
  364                 IFNET_RUNLOCK_NOSLEEP();
  365                 return (NULL);
  366         }
  367         if_ref(ifp);
  368         IFNET_RUNLOCK_NOSLEEP();
  369         return (ifp);
  370 }
  371 
  372 /*
  373  * Allocate an ifindex array entry; return 0 on success or an error on
  374  * failure.
  375  */
  376 static u_short
  377 ifindex_alloc(void **old)
  378 {
  379         u_short idx;
  380 
  381         IFNET_WLOCK_ASSERT();
  382         /*
  383          * Try to find an empty slot below V_if_index.  If we fail, take the
  384          * next slot.
  385          */
  386         for (idx = 1; idx <= V_if_index; idx++) {
  387                 if (V_ifindex_table[idx] == NULL)
  388                         break;
  389         }
  390 
  391         /* Catch if_index overflow. */
  392         if (idx >= V_if_indexlim) {
  393                 *old = if_grow();
  394                 return (USHRT_MAX);
  395         }
  396         if (idx > V_if_index)
  397                 V_if_index = idx;
  398         return (idx);
  399 }
  400 
  401 static void
  402 ifindex_free_locked(u_short idx)
  403 {
  404 
  405         IFNET_WLOCK_ASSERT();
  406 
  407         V_ifindex_table[idx] = NULL;
  408         while (V_if_index > 0 &&
  409             V_ifindex_table[V_if_index] == NULL)
  410                 V_if_index--;
  411 }
  412 
  413 static void
  414 ifindex_free(u_short idx)
  415 {
  416 
  417         IFNET_WLOCK();
  418         ifindex_free_locked(idx);
  419         IFNET_WUNLOCK();
  420 }
  421 
  422 static void
  423 ifnet_setbyindex(u_short idx, struct ifnet *ifp)
  424 {
  425 
  426         V_ifindex_table[idx] = ifp;
  427 }
  428 
  429 struct ifaddr *
  430 ifaddr_byindex(u_short idx)
  431 {
  432         struct ifnet *ifp;
  433         struct ifaddr *ifa = NULL;
  434 
  435         IFNET_RLOCK_NOSLEEP();
  436         ifp = ifnet_byindex_locked(idx);
  437         if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
  438                 ifa_ref(ifa);
  439         IFNET_RUNLOCK_NOSLEEP();
  440         return (ifa);
  441 }
  442 
  443 /*
  444  * Network interface utility routines.
  445  *
  446  * Routines with ifa_ifwith* names take sockaddr *'s as
  447  * parameters.
  448  */
  449 
  450 static void
  451 vnet_if_init(const void *unused __unused)
  452 {
  453         void *old;
  454 
  455         CK_STAILQ_INIT(&V_ifnet);
  456         CK_STAILQ_INIT(&V_ifg_head);
  457         IFNET_WLOCK();
  458         old = if_grow();                                /* create initial table */
  459         IFNET_WUNLOCK();
  460         epoch_wait_preempt(net_epoch_preempt);
  461         free(old, M_IFNET);
  462         vnet_if_clone_init();
  463 }
  464 VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
  465     NULL);
  466 
  467 #ifdef VIMAGE
  468 static void
  469 vnet_if_uninit(const void *unused __unused)
  470 {
  471 
  472         VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p "
  473             "not empty", __func__, __LINE__, &V_ifnet));
  474         VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p "
  475             "not empty", __func__, __LINE__, &V_ifg_head));
  476 
  477         free((caddr_t)V_ifindex_table, M_IFNET);
  478 }
  479 VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
  480     vnet_if_uninit, NULL);
  481 #endif
  482 
  483 static void
  484 if_link_ifnet(struct ifnet *ifp)
  485 {
  486 
  487         IFNET_WLOCK();
  488         CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
  489 #ifdef VIMAGE
  490         curvnet->vnet_ifcnt++;
  491 #endif
  492         IFNET_WUNLOCK();
  493 }
  494 
  495 static bool
  496 if_unlink_ifnet(struct ifnet *ifp, bool vmove)
  497 {
  498         struct ifnet *iter;
  499         int found = 0;
  500 
  501         IFNET_WLOCK();
  502         CK_STAILQ_FOREACH(iter, &V_ifnet, if_link)
  503                 if (iter == ifp) {
  504                         CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link);
  505                         if (!vmove)
  506                                 ifp->if_flags |= IFF_DYING;
  507                         found = 1;
  508                         break;
  509                 }
  510 #ifdef VIMAGE
  511         curvnet->vnet_ifcnt--;
  512 #endif
  513         IFNET_WUNLOCK();
  514 
  515         return (found);
  516 }
  517 
  518 #ifdef VIMAGE
  519 static void
  520 vnet_if_return(const void *unused __unused)
  521 {
  522         struct ifnet *ifp, *nifp;
  523         struct ifnet **pending;
  524         int found, i;
  525 
  526         i = 0;
  527 
  528         /*
  529          * We need to protect our access to the V_ifnet tailq. Ordinarily we'd
  530          * enter NET_EPOCH, but that's not possible, because if_vmove() calls
  531          * if_detach_internal(), which waits for NET_EPOCH callbacks to
  532          * complete. We can't do that from within NET_EPOCH.
  533          *
  534          * However, we can also use the IFNET_xLOCK, which is the V_ifnet
  535          * read/write lock. We cannot hold the lock as we call if_vmove()
  536          * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib
  537          * ctx lock.
  538          */
  539         IFNET_WLOCK();
  540 
  541         pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt,
  542             M_IFNET, M_WAITOK | M_ZERO);
  543 
  544         /* Return all inherited interfaces to their parent vnets. */
  545         CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
  546                 if (ifp->if_home_vnet != ifp->if_vnet) {
  547                         found = if_unlink_ifnet(ifp, true);
  548                         MPASS(found);
  549 
  550                         pending[i++] = ifp;
  551                 }
  552         }
  553         IFNET_WUNLOCK();
  554 
  555         for (int j = 0; j < i; j++) {
  556                 if_vmove(pending[j], pending[j]->if_home_vnet);
  557         }
  558 
  559         free(pending, M_IFNET);
  560 }
  561 VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
  562     vnet_if_return, NULL);
  563 #endif
  564 
  565 
  566 static void *
  567 if_grow(void)
  568 {
  569         int oldlim;
  570         u_int n;
  571         struct ifnet **e;
  572         void *old;
  573 
  574         old = NULL;
  575         IFNET_WLOCK_ASSERT();
  576         oldlim = V_if_indexlim;
  577         IFNET_WUNLOCK();
  578         n = (oldlim << 1) * sizeof(*e);
  579         e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
  580         IFNET_WLOCK();
  581         if (V_if_indexlim != oldlim) {
  582                 free(e, M_IFNET);
  583                 return (NULL);
  584         }
  585         if (V_ifindex_table != NULL) {
  586                 memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
  587                 old = V_ifindex_table;
  588         }
  589         V_if_indexlim <<= 1;
  590         V_ifindex_table = e;
  591         return (old);
  592 }
  593 
  594 /*
  595  * Allocate a struct ifnet and an index for an interface.  A layer 2
  596  * common structure will also be allocated if an allocation routine is
  597  * registered for the passed type.
  598  */
  599 struct ifnet *
  600 if_alloc(u_char type)
  601 {
  602         struct ifnet *ifp;
  603         u_short idx;
  604         void *old;
  605 
  606         ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
  607  restart:
  608         IFNET_WLOCK();
  609         idx = ifindex_alloc(&old);
  610         if (__predict_false(idx == USHRT_MAX)) {
  611                 IFNET_WUNLOCK();
  612                 epoch_wait_preempt(net_epoch_preempt);
  613                 free(old, M_IFNET);
  614                 goto restart;
  615         }
  616         ifnet_setbyindex(idx, IFNET_HOLD);
  617         IFNET_WUNLOCK();
  618         ifp->if_index = idx;
  619         ifp->if_type = type;
  620         ifp->if_alloctype = type;
  621 #ifdef VIMAGE
  622         ifp->if_vnet = curvnet;
  623 #endif
  624         if (if_com_alloc[type] != NULL) {
  625                 ifp->if_l2com = if_com_alloc[type](type, ifp);
  626                 if (ifp->if_l2com == NULL) {
  627                         free(ifp, M_IFNET);
  628                         ifindex_free(idx);
  629                         return (NULL);
  630                 }
  631         }
  632 
  633         IF_ADDR_LOCK_INIT(ifp);
  634         TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
  635         ifp->if_afdata_initialized = 0;
  636         IF_AFDATA_LOCK_INIT(ifp);
  637         CK_STAILQ_INIT(&ifp->if_addrhead);
  638         CK_STAILQ_INIT(&ifp->if_multiaddrs);
  639         CK_STAILQ_INIT(&ifp->if_groups);
  640 #ifdef MAC
  641         mac_ifnet_init(ifp);
  642 #endif
  643         ifq_init(&ifp->if_snd, ifp);
  644 
  645         refcount_init(&ifp->if_refcount, 1);    /* Index reference. */
  646         for (int i = 0; i < IFCOUNTERS; i++)
  647                 ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
  648         ifp->if_get_counter = if_get_counter_default;
  649         ifp->if_pcp = IFNET_PCP_NONE;
  650         ifnet_setbyindex(ifp->if_index, ifp);
  651         return (ifp);
  652 }
  653 
  654 /*
  655  * Do the actual work of freeing a struct ifnet, and layer 2 common
  656  * structure.  This call is made when the last reference to an
  657  * interface is released.
  658  */
  659 static void
  660 if_free_internal(struct ifnet *ifp)
  661 {
  662 
  663         KASSERT((ifp->if_flags & IFF_DYING),
  664             ("if_free_internal: interface not dying"));
  665 
  666         if (if_com_free[ifp->if_alloctype] != NULL)
  667                 if_com_free[ifp->if_alloctype](ifp->if_l2com,
  668                     ifp->if_alloctype);
  669 
  670 #ifdef MAC
  671         mac_ifnet_destroy(ifp);
  672 #endif /* MAC */
  673         IF_AFDATA_DESTROY(ifp);
  674         IF_ADDR_LOCK_DESTROY(ifp);
  675         ifq_delete(&ifp->if_snd);
  676 
  677         for (int i = 0; i < IFCOUNTERS; i++)
  678                 counter_u64_free(ifp->if_counters[i]);
  679 
  680         free(ifp->if_description, M_IFDESCR);
  681         free(ifp->if_hw_addr, M_IFADDR);
  682         free(ifp, M_IFNET);
  683 }
  684 
  685 static void
  686 if_destroy(epoch_context_t ctx)
  687 {
  688         struct ifnet *ifp;
  689 
  690         ifp = __containerof(ctx, struct ifnet, if_epoch_ctx);
  691         if_free_internal(ifp);
  692 }
  693 
  694 /*
  695  * Deregister an interface and free the associated storage.
  696  */
  697 void
  698 if_free(struct ifnet *ifp)
  699 {
  700 
  701         ifp->if_flags |= IFF_DYING;                     /* XXX: Locking */
  702 
  703         CURVNET_SET_QUIET(ifp->if_vnet);
  704         IFNET_WLOCK();
  705         KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
  706             ("%s: freeing unallocated ifnet", ifp->if_xname));
  707 
  708         ifindex_free_locked(ifp->if_index);
  709         IFNET_WUNLOCK();
  710 
  711         if (refcount_release(&ifp->if_refcount))
  712                 epoch_call(net_epoch_preempt, &ifp->if_epoch_ctx, if_destroy);
  713         CURVNET_RESTORE();
  714 }
  715 
  716 /*
  717  * Interfaces to keep an ifnet type-stable despite the possibility of the
  718  * driver calling if_free().  If there are additional references, we defer
  719  * freeing the underlying data structure.
  720  */
  721 void
  722 if_ref(struct ifnet *ifp)
  723 {
  724 
  725         /* We don't assert the ifnet list lock here, but arguably should. */
  726         refcount_acquire(&ifp->if_refcount);
  727 }
  728 
  729 void
  730 if_rele(struct ifnet *ifp)
  731 {
  732 
  733         if (!refcount_release(&ifp->if_refcount))
  734                 return;
  735         epoch_call(net_epoch_preempt, &ifp->if_epoch_ctx, if_destroy);
  736 }
  737 
  738 void
  739 ifq_init(struct ifaltq *ifq, struct ifnet *ifp)
  740 {
  741         
  742         mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
  743 
  744         if (ifq->ifq_maxlen == 0) 
  745                 ifq->ifq_maxlen = ifqmaxlen;
  746 
  747         ifq->altq_type = 0;
  748         ifq->altq_disc = NULL;
  749         ifq->altq_flags &= ALTQF_CANTCHANGE;
  750         ifq->altq_tbr  = NULL;
  751         ifq->altq_ifp  = ifp;
  752 }
  753 
  754 void
  755 ifq_delete(struct ifaltq *ifq)
  756 {
  757         mtx_destroy(&ifq->ifq_mtx);
  758 }
  759 
  760 /*
  761  * Perform generic interface initialization tasks and attach the interface
  762  * to the list of "active" interfaces.  If vmove flag is set on entry
  763  * to if_attach_internal(), perform only a limited subset of initialization
  764  * tasks, given that we are moving from one vnet to another an ifnet which
  765  * has already been fully initialized.
  766  *
  767  * Note that if_detach_internal() removes group membership unconditionally
  768  * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
  769  * Thus, when if_vmove() is applied to a cloned interface, group membership
  770  * is lost while a cloned one always joins a group whose name is
  771  * ifc->ifc_name.  To recover this after if_detach_internal() and
  772  * if_attach_internal(), the cloner should be specified to
  773  * if_attach_internal() via ifc.  If it is non-NULL, if_attach_internal()
  774  * attempts to join a group whose name is ifc->ifc_name.
  775  *
  776  * XXX:
  777  *  - The decision to return void and thus require this function to
  778  *    succeed is questionable.
  779  *  - We should probably do more sanity checking.  For instance we don't
  780  *    do anything to insure if_xname is unique or non-empty.
  781  */
  782 void
  783 if_attach(struct ifnet *ifp)
  784 {
  785 
  786         if_attach_internal(ifp, 0, NULL);
  787 }
  788 
  789 /*
  790  * Compute the least common TSO limit.
  791  */
  792 void
  793 if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
  794 {
  795         /*
  796          * 1) If there is no limit currently, take the limit from
  797          * the network adapter.
  798          *
  799          * 2) If the network adapter has a limit below the current
  800          * limit, apply it.
  801          */
  802         if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 &&
  803             ifp->if_hw_tsomax < pmax->tsomaxbytes)) {
  804                 pmax->tsomaxbytes = ifp->if_hw_tsomax;
  805         }
  806         if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 &&
  807             ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) {
  808                 pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
  809         }
  810         if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 &&
  811             ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) {
  812                 pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
  813         }
  814 }
  815 
  816 /*
  817  * Update TSO limit of a network adapter.
  818  *
  819  * Returns zero if no change. Else non-zero.
  820  */
  821 int
  822 if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
  823 {
  824         int retval = 0;
  825         if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
  826                 ifp->if_hw_tsomax = pmax->tsomaxbytes;
  827                 retval++;
  828         }
  829         if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) {
  830                 ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize;
  831                 retval++;
  832         }
  833         if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) {
  834                 ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount;
  835                 retval++;
  836         }
  837         return (retval);
  838 }
  839 
  840 static void
  841 if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
  842 {
  843         unsigned socksize, ifasize;
  844         int namelen, masklen;
  845         struct sockaddr_dl *sdl;
  846         struct ifaddr *ifa;
  847 
  848         if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
  849                 panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
  850                     ifp->if_xname);
  851 
  852 #ifdef VIMAGE
  853         ifp->if_vnet = curvnet;
  854         if (ifp->if_home_vnet == NULL)
  855                 ifp->if_home_vnet = curvnet;
  856 #endif
  857 
  858         if_addgroup(ifp, IFG_ALL);
  859 
  860         /* Restore group membership for cloned interfaces. */
  861         if (vmove && ifc != NULL)
  862                 if_clone_addgroup(ifp, ifc);
  863 
  864         getmicrotime(&ifp->if_lastchange);
  865         ifp->if_epoch = time_uptime;
  866 
  867         KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
  868             (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
  869             ("transmit and qflush must both either be set or both be NULL"));
  870         if (ifp->if_transmit == NULL) {
  871                 ifp->if_transmit = if_transmit;
  872                 ifp->if_qflush = if_qflush;
  873         }
  874         if (ifp->if_input == NULL)
  875                 ifp->if_input = if_input_default;
  876 
  877         if (ifp->if_requestencap == NULL)
  878                 ifp->if_requestencap = if_requestencap_default;
  879 
  880         if (!vmove) {
  881 #ifdef MAC
  882                 mac_ifnet_create(ifp);
  883 #endif
  884 
  885                 /*
  886                  * Create a Link Level name for this device.
  887                  */
  888                 namelen = strlen(ifp->if_xname);
  889                 /*
  890                  * Always save enough space for any possiable name so we
  891                  * can do a rename in place later.
  892                  */
  893                 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
  894                 socksize = masklen + ifp->if_addrlen;
  895                 if (socksize < sizeof(*sdl))
  896                         socksize = sizeof(*sdl);
  897                 socksize = roundup2(socksize, sizeof(long));
  898                 ifasize = sizeof(*ifa) + 2 * socksize;
  899                 ifa = ifa_alloc(ifasize, M_WAITOK);
  900                 sdl = (struct sockaddr_dl *)(ifa + 1);
  901                 sdl->sdl_len = socksize;
  902                 sdl->sdl_family = AF_LINK;
  903                 bcopy(ifp->if_xname, sdl->sdl_data, namelen);
  904                 sdl->sdl_nlen = namelen;
  905                 sdl->sdl_index = ifp->if_index;
  906                 sdl->sdl_type = ifp->if_type;
  907                 ifp->if_addr = ifa;
  908                 ifa->ifa_ifp = ifp;
  909                 ifa->ifa_rtrequest = link_rtrequest;
  910                 ifa->ifa_addr = (struct sockaddr *)sdl;
  911                 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
  912                 ifa->ifa_netmask = (struct sockaddr *)sdl;
  913                 sdl->sdl_len = masklen;
  914                 while (namelen != 0)
  915                         sdl->sdl_data[--namelen] = 0xff;
  916                 CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
  917                 /* Reliably crash if used uninitialized. */
  918                 ifp->if_broadcastaddr = NULL;
  919 
  920                 if (ifp->if_type == IFT_ETHER) {
  921                         ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR,
  922                             M_WAITOK | M_ZERO);
  923                 }
  924 
  925 #if defined(INET) || defined(INET6)
  926                 /* Use defaults for TSO, if nothing is set */
  927                 if (ifp->if_hw_tsomax == 0 &&
  928                     ifp->if_hw_tsomaxsegcount == 0 &&
  929                     ifp->if_hw_tsomaxsegsize == 0) {
  930                         /*
  931                          * The TSO defaults needs to be such that an
  932                          * NFS mbuf list of 35 mbufs totalling just
  933                          * below 64K works and that a chain of mbufs
  934                          * can be defragged into at most 32 segments:
  935                          */
  936                         ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) -
  937                             (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
  938                         ifp->if_hw_tsomaxsegcount = 35;
  939                         ifp->if_hw_tsomaxsegsize = 2048;        /* 2K */
  940 
  941                         /* XXX some drivers set IFCAP_TSO after ethernet attach */
  942                         if (ifp->if_capabilities & IFCAP_TSO) {
  943                                 if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n",
  944                                     ifp->if_hw_tsomax,
  945                                     ifp->if_hw_tsomaxsegcount,
  946                                     ifp->if_hw_tsomaxsegsize);
  947                         }
  948                 }
  949 #endif
  950         }
  951 #ifdef VIMAGE
  952         else {
  953                 /*
  954                  * Update the interface index in the link layer address
  955                  * of the interface.
  956                  */
  957                 for (ifa = ifp->if_addr; ifa != NULL;
  958                     ifa = CK_STAILQ_NEXT(ifa, ifa_link)) {
  959                         if (ifa->ifa_addr->sa_family == AF_LINK) {
  960                                 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
  961                                 sdl->sdl_index = ifp->if_index;
  962                         }
  963                 }
  964         }
  965 #endif
  966 
  967         if_link_ifnet(ifp);
  968 
  969         if (domain_init_status >= 2)
  970                 if_attachdomain1(ifp);
  971 
  972         EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
  973         if (IS_DEFAULT_VNET(curvnet))
  974                 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
  975 
  976         /* Announce the interface. */
  977         rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
  978 }
  979 
  980 static void
  981 if_epochalloc(void *dummy __unused)
  982 {
  983 
  984         net_epoch_preempt = epoch_alloc(EPOCH_PREEMPT);
  985         net_epoch = epoch_alloc(0);
  986 }
  987 SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL);
  988 
  989 static void
  990 if_attachdomain(void *dummy)
  991 {
  992         struct ifnet *ifp;
  993 
  994         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link)
  995                 if_attachdomain1(ifp);
  996 }
  997 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
  998     if_attachdomain, NULL);
  999 
 1000 static void
 1001 if_attachdomain1(struct ifnet *ifp)
 1002 {
 1003         struct domain *dp;
 1004 
 1005         /*
 1006          * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
 1007          * cannot lock ifp->if_afdata initialization, entirely.
 1008          */
 1009         IF_AFDATA_LOCK(ifp);
 1010         if (ifp->if_afdata_initialized >= domain_init_status) {
 1011                 IF_AFDATA_UNLOCK(ifp);
 1012                 log(LOG_WARNING, "%s called more than once on %s\n",
 1013                     __func__, ifp->if_xname);
 1014                 return;
 1015         }
 1016         ifp->if_afdata_initialized = domain_init_status;
 1017         IF_AFDATA_UNLOCK(ifp);
 1018 
 1019         /* address family dependent data region */
 1020         bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
 1021         for (dp = domains; dp; dp = dp->dom_next) {
 1022                 if (dp->dom_ifattach)
 1023                         ifp->if_afdata[dp->dom_family] =
 1024                             (*dp->dom_ifattach)(ifp);
 1025         }
 1026 }
 1027 
 1028 /*
 1029  * Remove any unicast or broadcast network addresses from an interface.
 1030  */
 1031 void
 1032 if_purgeaddrs(struct ifnet *ifp)
 1033 {
 1034         struct ifaddr *ifa;
 1035 
 1036         while (1) {
 1037                 NET_EPOCH_ENTER();
 1038                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 1039                         if (ifa->ifa_addr->sa_family != AF_LINK)
 1040                                 break;
 1041                 }
 1042                 NET_EPOCH_EXIT();
 1043 
 1044                 if (ifa == NULL)
 1045                         break;
 1046 #ifdef INET
 1047                 /* XXX: Ugly!! ad hoc just for INET */
 1048                 if (ifa->ifa_addr->sa_family == AF_INET) {
 1049                         struct ifaliasreq ifr;
 1050 
 1051                         bzero(&ifr, sizeof(ifr));
 1052                         ifr.ifra_addr = *ifa->ifa_addr;
 1053                         if (ifa->ifa_dstaddr)
 1054                                 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
 1055                         if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
 1056                             NULL) == 0)
 1057                                 continue;
 1058                 }
 1059 #endif /* INET */
 1060 #ifdef INET6
 1061                 if (ifa->ifa_addr->sa_family == AF_INET6) {
 1062                         in6_purgeaddr(ifa);
 1063                         /* ifp_addrhead is already updated */
 1064                         continue;
 1065                 }
 1066 #endif /* INET6 */
 1067                 IF_ADDR_WLOCK(ifp);
 1068                 CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
 1069                 IF_ADDR_WUNLOCK(ifp);
 1070                 ifa_free(ifa);
 1071         }
 1072 }
 1073 
 1074 /*
 1075  * Remove any multicast network addresses from an interface when an ifnet
 1076  * is going away.
 1077  */
 1078 static void
 1079 if_purgemaddrs(struct ifnet *ifp)
 1080 {
 1081         struct ifmultiaddr *ifma;
 1082 
 1083         IF_ADDR_WLOCK(ifp);
 1084         while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) {
 1085                 ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs);
 1086                 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 1087                 if_delmulti_locked(ifp, ifma, 1);
 1088         }
 1089         IF_ADDR_WUNLOCK(ifp);
 1090 }
 1091 
 1092 /*
 1093  * Detach an interface, removing it from the list of "active" interfaces.
 1094  * If vmove flag is set on entry to if_detach_internal(), perform only a
 1095  * limited subset of cleanup tasks, given that we are moving an ifnet from
 1096  * one vnet to another, where it must be fully operational.
 1097  *
 1098  * XXXRW: There are some significant questions about event ordering, and
 1099  * how to prevent things from starting to use the interface during detach.
 1100  */
 1101 void
 1102 if_detach(struct ifnet *ifp)
 1103 {
 1104         bool found;
 1105 
 1106         CURVNET_SET_QUIET(ifp->if_vnet);
 1107         found = if_unlink_ifnet(ifp, false);
 1108         if (found) {
 1109                 sx_xlock(&ifnet_detach_sxlock);
 1110                 if_detach_internal(ifp, 0, NULL);
 1111                 sx_xunlock(&ifnet_detach_sxlock);
 1112         }
 1113         CURVNET_RESTORE();
 1114 }
 1115 
 1116 /*
 1117  * The vmove flag, if set, indicates that we are called from a callpath
 1118  * that is moving an interface to a different vnet instance.
 1119  *
 1120  * The shutdown flag, if set, indicates that we are called in the
 1121  * process of shutting down a vnet instance.  Currently only the
 1122  * vnet_if_return SYSUNINIT function sets it.  Note: we can be called
 1123  * on a vnet instance shutdown without this flag being set, e.g., when
 1124  * the cloned interfaces are destoyed as first thing of teardown.
 1125  */
 1126 static int
 1127 if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
 1128 {
 1129         struct ifaddr *ifa;
 1130         int i;
 1131         struct domain *dp;
 1132 #ifdef VIMAGE
 1133         int shutdown;
 1134 
 1135         shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
 1136                  ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
 1137 #endif
 1138 
 1139         /*
 1140          * At this point we know the interface still was on the ifnet list
 1141          * and we removed it so we are in a stable state.
 1142          */
 1143         epoch_wait_preempt(net_epoch_preempt);
 1144 
 1145         /*
 1146          * Ensure all pending EPOCH(9) callbacks have been executed. This
 1147          * fixes issues about late destruction of multicast options
 1148          * which lead to leave group calls, which in turn access the
 1149          * belonging ifnet structure:
 1150          */
 1151         epoch_drain_callbacks(net_epoch_preempt);
 1152 
 1153         /*
 1154          * In any case (destroy or vmove) detach us from the groups
 1155          * and remove/wait for pending events on the taskq.
 1156          * XXX-BZ in theory an interface could still enqueue a taskq change?
 1157          */
 1158         if_delgroups(ifp);
 1159 
 1160         taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
 1161 
 1162         /*
 1163          * Check if this is a cloned interface or not. Must do even if
 1164          * shutting down as a if_vmove_reclaim() would move the ifp and
 1165          * the if_clone_addgroup() will have a corrupted string overwise
 1166          * from a gibberish pointer.
 1167          */
 1168         if (vmove && ifcp != NULL)
 1169                 *ifcp = if_clone_findifc(ifp);
 1170 
 1171         if_down(ifp);
 1172 
 1173 #ifdef VIMAGE
 1174         /*
 1175          * On VNET shutdown abort here as the stack teardown will do all
 1176          * the work top-down for us.
 1177          */
 1178         if (shutdown) {
 1179                 /* Give interface users the chance to clean up. */
 1180                 EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 1181 
 1182                 /*
 1183                  * In case of a vmove we are done here without error.
 1184                  * If we would signal an error it would lead to the same
 1185                  * abort as if we did not find the ifnet anymore.
 1186                  * if_detach() calls us in void context and does not care
 1187                  * about an early abort notification, so life is splendid :)
 1188                  */
 1189                 goto finish_vnet_shutdown;
 1190         }
 1191 #endif
 1192 
 1193         /*
 1194          * At this point we are not tearing down a VNET and are either
 1195          * going to destroy or vmove the interface and have to cleanup
 1196          * accordingly.
 1197          */
 1198 
 1199         /*
 1200          * Remove routes and flush queues.
 1201          */
 1202 #ifdef ALTQ
 1203         if (ALTQ_IS_ENABLED(&ifp->if_snd))
 1204                 altq_disable(&ifp->if_snd);
 1205         if (ALTQ_IS_ATTACHED(&ifp->if_snd))
 1206                 altq_detach(&ifp->if_snd);
 1207 #endif
 1208 
 1209         if_purgeaddrs(ifp);
 1210 
 1211 #ifdef INET
 1212         in_ifdetach(ifp);
 1213 #endif
 1214 
 1215 #ifdef INET6
 1216         /*
 1217          * Remove all IPv6 kernel structs related to ifp.  This should be done
 1218          * before removing routing entries below, since IPv6 interface direct
 1219          * routes are expected to be removed by the IPv6-specific kernel API.
 1220          * Otherwise, the kernel will detect some inconsistency and bark it.
 1221          */
 1222         in6_ifdetach(ifp);
 1223 #endif
 1224         if_purgemaddrs(ifp);
 1225 
 1226         /* Announce that the interface is gone. */
 1227         rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 1228         EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 1229         if (IS_DEFAULT_VNET(curvnet))
 1230                 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
 1231 
 1232         if (!vmove) {
 1233                 /*
 1234                  * Prevent further calls into the device driver via ifnet.
 1235                  */
 1236                 if_dead(ifp);
 1237 
 1238                 /*
 1239                  * Clean up all addresses.
 1240                  */
 1241                 IF_ADDR_WLOCK(ifp);
 1242                 if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) {
 1243                         ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
 1244                         CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
 1245                         IF_ADDR_WUNLOCK(ifp);
 1246                         ifa_free(ifa);
 1247                 } else
 1248                         IF_ADDR_WUNLOCK(ifp);
 1249         }
 1250 
 1251         rt_flushifroutes(ifp);
 1252 
 1253 #ifdef VIMAGE
 1254 finish_vnet_shutdown:
 1255 #endif
 1256         /*
 1257          * We cannot hold the lock over dom_ifdetach calls as they might
 1258          * sleep, for example trying to drain a callout, thus open up the
 1259          * theoretical race with re-attaching.
 1260          */
 1261         IF_AFDATA_LOCK(ifp);
 1262         i = ifp->if_afdata_initialized;
 1263         ifp->if_afdata_initialized = 0;
 1264         IF_AFDATA_UNLOCK(ifp);
 1265         for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
 1266                 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
 1267                         (*dp->dom_ifdetach)(ifp,
 1268                             ifp->if_afdata[dp->dom_family]);
 1269                         ifp->if_afdata[dp->dom_family] = NULL;
 1270                 }
 1271         }
 1272 
 1273         return (0);
 1274 }
 1275 
 1276 #ifdef VIMAGE
 1277 /*
 1278  * if_vmove() performs a limited version of if_detach() in current
 1279  * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
 1280  * An attempt is made to shrink if_index in current vnet, find an
 1281  * unused if_index in target vnet and calls if_grow() if necessary,
 1282  * and finally find an unused if_xname for the target vnet.
 1283  */
 1284 static void
 1285 if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
 1286 {
 1287         struct if_clone *ifc;
 1288 #ifdef DEV_BPF
 1289         u_int bif_dlt, bif_hdrlen;
 1290 #endif
 1291         void *old;
 1292         int rc;
 1293 
 1294 #ifdef DEV_BPF
 1295         /*
 1296          * if_detach_internal() will call the eventhandler to notify
 1297          * interface departure.  That will detach if_bpf.  We need to
 1298          * safe the dlt and hdrlen so we can re-attach it later.
 1299          */
 1300         bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
 1301 #endif
 1302 
 1303         /*
 1304          * Detach from current vnet, but preserve LLADDR info, do not
 1305          * mark as dead etc. so that the ifnet can be reattached later.
 1306          * If we cannot find it, we lost the race to someone else.
 1307          */
 1308         rc = if_detach_internal(ifp, 1, &ifc);
 1309         if (rc != 0)
 1310                 return;
 1311 
 1312         /*
 1313          * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
 1314          * the if_index for that vnet if possible.
 1315          *
 1316          * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
 1317          * or we'd lock on one vnet and unlock on another.
 1318          */
 1319         IFNET_WLOCK();
 1320         ifindex_free_locked(ifp->if_index);
 1321         IFNET_WUNLOCK();
 1322 
 1323         /*
 1324          * Perform interface-specific reassignment tasks, if provided by
 1325          * the driver.
 1326          */
 1327         if (ifp->if_reassign != NULL)
 1328                 ifp->if_reassign(ifp, new_vnet, NULL);
 1329 
 1330         /*
 1331          * Switch to the context of the target vnet.
 1332          */
 1333         CURVNET_SET_QUIET(new_vnet);
 1334  restart:
 1335         IFNET_WLOCK();
 1336         ifp->if_index = ifindex_alloc(&old);
 1337         if (__predict_false(ifp->if_index == USHRT_MAX)) {
 1338                 IFNET_WUNLOCK();
 1339                 epoch_wait_preempt(net_epoch_preempt);
 1340                 free(old, M_IFNET);
 1341                 goto restart;
 1342         }
 1343         ifnet_setbyindex(ifp->if_index, ifp);
 1344         IFNET_WUNLOCK();
 1345 
 1346         if_attach_internal(ifp, 1, ifc);
 1347 
 1348 #ifdef DEV_BPF
 1349         if (ifp->if_bpf == NULL)
 1350                 bpfattach(ifp, bif_dlt, bif_hdrlen);
 1351 #endif
 1352 
 1353         CURVNET_RESTORE();
 1354 }
 1355 
 1356 /*
 1357  * Move an ifnet to or from another child prison/vnet, specified by the jail id.
 1358  */
 1359 static int
 1360 if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
 1361 {
 1362         struct prison *pr;
 1363         struct ifnet *difp;
 1364         int shutdown;
 1365         bool found;
 1366 
 1367         /* Try to find the prison within our visibility. */
 1368         sx_slock(&allprison_lock);
 1369         pr = prison_find_child(td->td_ucred->cr_prison, jid);
 1370         sx_sunlock(&allprison_lock);
 1371         if (pr == NULL)
 1372                 return (ENXIO);
 1373         prison_hold_locked(pr);
 1374         mtx_unlock(&pr->pr_mtx);
 1375 
 1376         /* Do not try to move the iface from and to the same prison. */
 1377         if (pr->pr_vnet == ifp->if_vnet) {
 1378                 prison_free(pr);
 1379                 return (EEXIST);
 1380         }
 1381 
 1382         /* Make sure the named iface does not exists in the dst. prison/vnet. */
 1383         /* XXX Lock interfaces to avoid races. */
 1384         CURVNET_SET_QUIET(pr->pr_vnet);
 1385         difp = ifunit(ifname);
 1386         if (difp != NULL) {
 1387                 CURVNET_RESTORE();
 1388                 prison_free(pr);
 1389                 return (EEXIST);
 1390         }
 1391 
 1392         /* Make sure the VNET is stable. */
 1393         shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
 1394                  ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
 1395         if (shutdown) {
 1396                 CURVNET_RESTORE();
 1397                 prison_free(pr);
 1398                 return (EBUSY);
 1399         }
 1400         CURVNET_RESTORE();
 1401 
 1402         found = if_unlink_ifnet(ifp, true);
 1403         MPASS(found);
 1404 
 1405         /* Move the interface into the child jail/vnet. */
 1406         if_vmove(ifp, pr->pr_vnet);
 1407 
 1408         /* Report the new if_xname back to the userland. */
 1409         sprintf(ifname, "%s", ifp->if_xname);
 1410 
 1411         prison_free(pr);
 1412         return (0);
 1413 }
 1414 
 1415 static int
 1416 if_vmove_reclaim(struct thread *td, char *ifname, int jid)
 1417 {
 1418         struct prison *pr;
 1419         struct vnet *vnet_dst;
 1420         struct ifnet *ifp;
 1421         int shutdown;
 1422         bool found;
 1423 
 1424         /* Try to find the prison within our visibility. */
 1425         sx_slock(&allprison_lock);
 1426         pr = prison_find_child(td->td_ucred->cr_prison, jid);
 1427         sx_sunlock(&allprison_lock);
 1428         if (pr == NULL)
 1429                 return (ENXIO);
 1430         prison_hold_locked(pr);
 1431         mtx_unlock(&pr->pr_mtx);
 1432 
 1433         /* Make sure the named iface exists in the source prison/vnet. */
 1434         CURVNET_SET(pr->pr_vnet);
 1435         ifp = ifunit(ifname);           /* XXX Lock to avoid races. */
 1436         if (ifp == NULL) {
 1437                 CURVNET_RESTORE();
 1438                 prison_free(pr);
 1439                 return (ENXIO);
 1440         }
 1441 
 1442         /* Do not try to move the iface from and to the same prison. */
 1443         vnet_dst = TD_TO_VNET(td);
 1444         if (vnet_dst == ifp->if_vnet) {
 1445                 CURVNET_RESTORE();
 1446                 prison_free(pr);
 1447                 return (EEXIST);
 1448         }
 1449 
 1450         /* Make sure the VNET is stable. */
 1451         shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
 1452                  ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
 1453         if (shutdown) {
 1454                 CURVNET_RESTORE();
 1455                 prison_free(pr);
 1456                 return (EBUSY);
 1457         }
 1458 
 1459         /* Get interface back from child jail/vnet. */
 1460         found = if_unlink_ifnet(ifp, true);
 1461         MPASS(found);
 1462         if_vmove(ifp, vnet_dst);
 1463         CURVNET_RESTORE();
 1464 
 1465         /* Report the new if_xname back to the userland. */
 1466         sprintf(ifname, "%s", ifp->if_xname);
 1467 
 1468         prison_free(pr);
 1469         return (0);
 1470 }
 1471 #endif /* VIMAGE */
 1472 
 1473 /*
 1474  * Add a group to an interface
 1475  */
 1476 int
 1477 if_addgroup(struct ifnet *ifp, const char *groupname)
 1478 {
 1479         struct ifg_list         *ifgl;
 1480         struct ifg_group        *ifg = NULL;
 1481         struct ifg_member       *ifgm;
 1482         int                      new = 0;
 1483 
 1484         if (groupname[0] && groupname[strlen(groupname) - 1] >= '' &&
 1485             groupname[strlen(groupname) - 1] <= '9')
 1486                 return (EINVAL);
 1487 
 1488         IFNET_WLOCK();
 1489         CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 1490                 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
 1491                         IFNET_WUNLOCK();
 1492                         return (EEXIST);
 1493                 }
 1494 
 1495         if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) {
 1496                 IFNET_WUNLOCK();
 1497                 return (ENOMEM);
 1498         }
 1499 
 1500         if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
 1501                 free(ifgl, M_TEMP);
 1502                 IFNET_WUNLOCK();
 1503                 return (ENOMEM);
 1504         }
 1505 
 1506         CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
 1507                 if (!strcmp(ifg->ifg_group, groupname))
 1508                         break;
 1509 
 1510         if (ifg == NULL) {
 1511                 if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) {
 1512                         free(ifgl, M_TEMP);
 1513                         free(ifgm, M_TEMP);
 1514                         IFNET_WUNLOCK();
 1515                         return (ENOMEM);
 1516                 }
 1517                 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
 1518                 ifg->ifg_refcnt = 0;
 1519                 CK_STAILQ_INIT(&ifg->ifg_members);
 1520                 CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
 1521                 new = 1;
 1522         }
 1523 
 1524         ifg->ifg_refcnt++;
 1525         ifgl->ifgl_group = ifg;
 1526         ifgm->ifgm_ifp = ifp;
 1527 
 1528         IF_ADDR_WLOCK(ifp);
 1529         CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
 1530         CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
 1531         IF_ADDR_WUNLOCK(ifp);
 1532 
 1533         IFNET_WUNLOCK();
 1534 
 1535         if (new)
 1536                 EVENTHANDLER_INVOKE(group_attach_event, ifg);
 1537         EVENTHANDLER_INVOKE(group_change_event, groupname);
 1538 
 1539         return (0);
 1540 }
 1541 
 1542 /*
 1543  * Helper function to remove a group out of an interface.  Expects the global
 1544  * ifnet lock to be write-locked, and drops it before returning.
 1545  */
 1546 static void
 1547 _if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl,
 1548     const char *groupname)
 1549 {
 1550         struct ifg_member *ifgm;
 1551         bool freeifgl;
 1552 
 1553         IFNET_WLOCK_ASSERT();
 1554 
 1555         IF_ADDR_WLOCK(ifp);
 1556         CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next);
 1557         IF_ADDR_WUNLOCK(ifp);
 1558 
 1559         CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) {
 1560                 if (ifgm->ifgm_ifp == ifp) {
 1561                         CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
 1562                             ifg_member, ifgm_next);
 1563                         break;
 1564                 }
 1565         }
 1566 
 1567         if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 1568                 CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group,
 1569                     ifg_next);
 1570                 freeifgl = true;
 1571         } else {
 1572                 freeifgl = false;
 1573         }
 1574         IFNET_WUNLOCK();
 1575 
 1576         epoch_wait_preempt(net_epoch_preempt);
 1577         EVENTHANDLER_INVOKE(group_change_event, groupname);
 1578         if (freeifgl) {
 1579                 EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
 1580                 free(ifgl->ifgl_group, M_TEMP);
 1581         }
 1582         free(ifgm, M_TEMP);
 1583         free(ifgl, M_TEMP);
 1584 }
 1585 
 1586 /*
 1587  * Remove a group from an interface
 1588  */
 1589 int
 1590 if_delgroup(struct ifnet *ifp, const char *groupname)
 1591 {
 1592         struct ifg_list *ifgl;
 1593 
 1594         IFNET_WLOCK();
 1595         CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 1596                 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0)
 1597                         break;
 1598         if (ifgl == NULL) {
 1599                 IFNET_WUNLOCK();
 1600                 return (ENOENT);
 1601         }
 1602 
 1603         _if_delgroup_locked(ifp, ifgl, groupname);
 1604 
 1605         return (0);
 1606 }
 1607 
 1608 /*
 1609  * Remove an interface from all groups
 1610  */
 1611 static void
 1612 if_delgroups(struct ifnet *ifp)
 1613 {
 1614         struct ifg_list *ifgl;
 1615         char groupname[IFNAMSIZ];
 1616 
 1617         IFNET_WLOCK();
 1618         while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) {
 1619                 strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
 1620                 _if_delgroup_locked(ifp, ifgl, groupname);
 1621                 IFNET_WLOCK();
 1622         }
 1623         IFNET_WUNLOCK();
 1624 }
 1625 
 1626 static char *
 1627 ifgr_group_get(void *ifgrp)
 1628 {
 1629         union ifgroupreq_union *ifgrup;
 1630 
 1631         ifgrup = ifgrp;
 1632 #ifdef COMPAT_FREEBSD32
 1633         if (SV_CURPROC_FLAG(SV_ILP32))
 1634                 return (&ifgrup->ifgr32.ifgr_ifgru.ifgru_group[0]);
 1635 #endif
 1636         return (&ifgrup->ifgr.ifgr_ifgru.ifgru_group[0]);
 1637 }
 1638 
 1639 static struct ifg_req *
 1640 ifgr_groups_get(void *ifgrp)
 1641 {
 1642         union ifgroupreq_union *ifgrup;
 1643 
 1644         ifgrup = ifgrp;
 1645 #ifdef COMPAT_FREEBSD32
 1646         if (SV_CURPROC_FLAG(SV_ILP32))
 1647                 return ((struct ifg_req *)(uintptr_t)
 1648                     ifgrup->ifgr32.ifgr_ifgru.ifgru_groups);
 1649 #endif
 1650         return (ifgrup->ifgr.ifgr_ifgru.ifgru_groups);
 1651 }
 1652 
 1653 /*
 1654  * Stores all groups from an interface in memory pointed to by ifgr.
 1655  */
 1656 static int
 1657 if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp)
 1658 {
 1659         int                      len, error;
 1660         struct ifg_list         *ifgl;
 1661         struct ifg_req           ifgrq, *ifgp;
 1662 
 1663         if (ifgr->ifgr_len == 0) {
 1664                 IF_ADDR_RLOCK(ifp);
 1665                 CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 1666                         ifgr->ifgr_len += sizeof(struct ifg_req);
 1667                 IF_ADDR_RUNLOCK(ifp);
 1668                 return (0);
 1669         }
 1670 
 1671         len = ifgr->ifgr_len;
 1672         ifgp = ifgr_groups_get(ifgr);
 1673         /* XXX: wire */
 1674         IF_ADDR_RLOCK(ifp);
 1675         CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
 1676                 if (len < sizeof(ifgrq)) {
 1677                         IF_ADDR_RUNLOCK(ifp);
 1678                         return (EINVAL);
 1679                 }
 1680                 bzero(&ifgrq, sizeof ifgrq);
 1681                 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
 1682                     sizeof(ifgrq.ifgrq_group));
 1683                 if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
 1684                         IF_ADDR_RUNLOCK(ifp);
 1685                         return (error);
 1686                 }
 1687                 len -= sizeof(ifgrq);
 1688                 ifgp++;
 1689         }
 1690         IF_ADDR_RUNLOCK(ifp);
 1691 
 1692         return (0);
 1693 }
 1694 
 1695 /*
 1696  * Stores all members of a group in memory pointed to by igfr
 1697  */
 1698 static int
 1699 if_getgroupmembers(struct ifgroupreq *ifgr)
 1700 {
 1701         struct ifg_group        *ifg;
 1702         struct ifg_member       *ifgm;
 1703         struct ifg_req           ifgrq, *ifgp;
 1704         int                      len, error;
 1705 
 1706         IFNET_RLOCK();
 1707         CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
 1708                 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0)
 1709                         break;
 1710         if (ifg == NULL) {
 1711                 IFNET_RUNLOCK();
 1712                 return (ENOENT);
 1713         }
 1714 
 1715         if (ifgr->ifgr_len == 0) {
 1716                 CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
 1717                         ifgr->ifgr_len += sizeof(ifgrq);
 1718                 IFNET_RUNLOCK();
 1719                 return (0);
 1720         }
 1721 
 1722         len = ifgr->ifgr_len;
 1723         ifgp = ifgr_groups_get(ifgr);
 1724         CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
 1725                 if (len < sizeof(ifgrq)) {
 1726                         IFNET_RUNLOCK();
 1727                         return (EINVAL);
 1728                 }
 1729                 bzero(&ifgrq, sizeof ifgrq);
 1730                 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
 1731                     sizeof(ifgrq.ifgrq_member));
 1732                 if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
 1733                         IFNET_RUNLOCK();
 1734                         return (error);
 1735                 }
 1736                 len -= sizeof(ifgrq);
 1737                 ifgp++;
 1738         }
 1739         IFNET_RUNLOCK();
 1740 
 1741         return (0);
 1742 }
 1743 
 1744 /*
 1745  * Return counter values from counter(9)s stored in ifnet.
 1746  */
 1747 uint64_t
 1748 if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
 1749 {
 1750 
 1751         KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 1752 
 1753         return (counter_u64_fetch(ifp->if_counters[cnt]));
 1754 }
 1755 
 1756 /*
 1757  * Increase an ifnet counter. Usually used for counters shared
 1758  * between the stack and a driver, but function supports them all.
 1759  */
 1760 void
 1761 if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
 1762 {
 1763 
 1764         KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 1765 
 1766         counter_u64_add(ifp->if_counters[cnt], inc);
 1767 }
 1768 
 1769 /*
 1770  * Copy data from ifnet to userland API structure if_data.
 1771  */
 1772 void
 1773 if_data_copy(struct ifnet *ifp, struct if_data *ifd)
 1774 {
 1775 
 1776         ifd->ifi_type = ifp->if_type;
 1777         ifd->ifi_physical = 0;
 1778         ifd->ifi_addrlen = ifp->if_addrlen;
 1779         ifd->ifi_hdrlen = ifp->if_hdrlen;
 1780         ifd->ifi_link_state = ifp->if_link_state;
 1781         ifd->ifi_vhid = 0;
 1782         ifd->ifi_datalen = sizeof(struct if_data);
 1783         ifd->ifi_mtu = ifp->if_mtu;
 1784         ifd->ifi_metric = ifp->if_metric;
 1785         ifd->ifi_baudrate = ifp->if_baudrate;
 1786         ifd->ifi_hwassist = ifp->if_hwassist;
 1787         ifd->ifi_epoch = ifp->if_epoch;
 1788         ifd->ifi_lastchange = ifp->if_lastchange;
 1789 
 1790         ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
 1791         ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
 1792         ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
 1793         ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
 1794         ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
 1795         ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
 1796         ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
 1797         ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
 1798         ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
 1799         ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
 1800         ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
 1801         ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
 1802 }
 1803 
 1804 struct ifnet_read_lock {
 1805         struct mtx mtx; /* lock protecting tracker below */
 1806         struct epoch_tracker et;
 1807 };
 1808 
 1809 DPCPU_DEFINE_STATIC(struct ifnet_read_lock, ifnet_addr_read_lock);
 1810 DPCPU_DEFINE_STATIC(struct ifnet_read_lock, ifnet_maddr_read_lock);
 1811 
 1812 static void
 1813 ifnet_read_lock_init(void __unused *arg)
 1814 {
 1815         struct ifnet_read_lock *pifrl;
 1816         int cpu;
 1817 
 1818         CPU_FOREACH(cpu) {
 1819                 pifrl = DPCPU_ID_PTR(cpu, ifnet_addr_read_lock);
 1820                 mtx_init(&pifrl->mtx, "ifnet_addr_read_lock", NULL, MTX_DEF);
 1821 
 1822                 pifrl = DPCPU_ID_PTR(cpu, ifnet_maddr_read_lock);
 1823                 mtx_init(&pifrl->mtx, "ifnet_maddr_read_lock", NULL, MTX_DEF);
 1824         }
 1825 }
 1826 SYSINIT(ifnet_read_lock_init, SI_SUB_CPU + 1, SI_ORDER_FIRST, &ifnet_read_lock_init, NULL);
 1827 
 1828 /*
 1829  * Wrapper functions for struct ifnet address list locking macros.  These are
 1830  * used by kernel modules to avoid encoding programming interface or binary
 1831  * interface assumptions that may be violated when kernel-internal locking
 1832  * approaches change.
 1833  */
 1834 void
 1835 if_addr_rlock(struct ifnet *ifp)
 1836 {
 1837         struct ifnet_read_lock *pifrl;
 1838 
 1839         sched_pin();
 1840         pifrl = DPCPU_PTR(ifnet_addr_read_lock);
 1841         mtx_lock(&pifrl->mtx);
 1842         epoch_enter_preempt(net_epoch_preempt, &pifrl->et);
 1843 }
 1844 
 1845 void
 1846 if_addr_runlock(struct ifnet *ifp)
 1847 {
 1848         struct ifnet_read_lock *pifrl;
 1849 
 1850         pifrl = DPCPU_PTR(ifnet_addr_read_lock);
 1851 
 1852         epoch_exit_preempt(net_epoch_preempt, &pifrl->et);
 1853         mtx_unlock(&pifrl->mtx);
 1854         sched_unpin();
 1855 }
 1856 
 1857 void
 1858 if_maddr_rlock(if_t ifp)
 1859 {
 1860         struct ifnet_read_lock *pifrl;
 1861 
 1862         sched_pin();
 1863         pifrl = DPCPU_PTR(ifnet_maddr_read_lock);
 1864         mtx_lock(&pifrl->mtx);
 1865         epoch_enter_preempt(net_epoch_preempt, &pifrl->et);
 1866 }
 1867 
 1868 void
 1869 if_maddr_runlock(if_t ifp)
 1870 {
 1871         struct ifnet_read_lock *pifrl;
 1872 
 1873         pifrl = DPCPU_PTR(ifnet_maddr_read_lock);
 1874 
 1875         epoch_exit_preempt(net_epoch_preempt, &pifrl->et);
 1876         mtx_unlock(&pifrl->mtx);
 1877         sched_unpin();
 1878 }
 1879 
 1880 /*
 1881  * Initialization, destruction and refcounting functions for ifaddrs.
 1882  */
 1883 struct ifaddr *
 1884 ifa_alloc(size_t size, int flags)
 1885 {
 1886         struct ifaddr *ifa;
 1887 
 1888         KASSERT(size >= sizeof(struct ifaddr),
 1889             ("%s: invalid size %zu", __func__, size));
 1890 
 1891         ifa = malloc(size, M_IFADDR, M_ZERO | flags);
 1892         if (ifa == NULL)
 1893                 return (NULL);
 1894 
 1895         if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
 1896                 goto fail;
 1897         if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
 1898                 goto fail;
 1899         if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
 1900                 goto fail;
 1901         if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
 1902                 goto fail;
 1903 
 1904         refcount_init(&ifa->ifa_refcnt, 1);
 1905 
 1906         return (ifa);
 1907 
 1908 fail:
 1909         /* free(NULL) is okay */
 1910         counter_u64_free(ifa->ifa_opackets);
 1911         counter_u64_free(ifa->ifa_ipackets);
 1912         counter_u64_free(ifa->ifa_obytes);
 1913         counter_u64_free(ifa->ifa_ibytes);
 1914         free(ifa, M_IFADDR);
 1915 
 1916         return (NULL);
 1917 }
 1918 
 1919 void
 1920 ifa_ref(struct ifaddr *ifa)
 1921 {
 1922 
 1923         refcount_acquire(&ifa->ifa_refcnt);
 1924 }
 1925 
 1926 static void
 1927 ifa_destroy(epoch_context_t ctx)
 1928 {
 1929         struct ifaddr *ifa;
 1930 
 1931         ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx);
 1932         counter_u64_free(ifa->ifa_opackets);
 1933         counter_u64_free(ifa->ifa_ipackets);
 1934         counter_u64_free(ifa->ifa_obytes);
 1935         counter_u64_free(ifa->ifa_ibytes);
 1936         free(ifa, M_IFADDR);
 1937 }
 1938 
 1939 void
 1940 ifa_free(struct ifaddr *ifa)
 1941 {
 1942 
 1943         if (refcount_release(&ifa->ifa_refcnt))
 1944                 epoch_call(net_epoch_preempt, &ifa->ifa_epoch_ctx, ifa_destroy);
 1945 }
 1946 
 1947 
 1948 static int
 1949 ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
 1950     struct sockaddr *ia)
 1951 {
 1952         int error;
 1953         struct rt_addrinfo info;
 1954         struct sockaddr_dl null_sdl;
 1955         struct ifnet *ifp;
 1956 
 1957         ifp = ifa->ifa_ifp;
 1958 
 1959         bzero(&info, sizeof(info));
 1960         if (cmd != RTM_DELETE)
 1961                 info.rti_ifp = V_loif;
 1962         info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
 1963         info.rti_info[RTAX_DST] = ia;
 1964         info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
 1965         link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
 1966 
 1967         error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
 1968 
 1969         if (error == 0 ||
 1970             (cmd == RTM_ADD && error == EEXIST) ||
 1971             (cmd == RTM_DELETE && (error == ENOENT || error == ESRCH)))
 1972                 return (error);
 1973 
 1974         log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
 1975                 __func__, otype, if_name(ifp), error);
 1976 
 1977         return (error);
 1978 }
 1979 
 1980 int
 1981 ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 1982 {
 1983 
 1984         return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
 1985 }
 1986 
 1987 int
 1988 ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 1989 {
 1990 
 1991         return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
 1992 }
 1993 
 1994 int
 1995 ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
 1996 {
 1997 
 1998         return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
 1999 }
 2000 
 2001 /*
 2002  * XXX: Because sockaddr_dl has deeper structure than the sockaddr
 2003  * structs used to represent other address families, it is necessary
 2004  * to perform a different comparison.
 2005  */
 2006 
 2007 #define sa_dl_equal(a1, a2)     \
 2008         ((((const struct sockaddr_dl *)(a1))->sdl_len ==                \
 2009          ((const struct sockaddr_dl *)(a2))->sdl_len) &&                \
 2010          (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)),               \
 2011                CLLADDR((const struct sockaddr_dl *)(a2)),               \
 2012                ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
 2013 
 2014 /*
 2015  * Locate an interface based on a complete address.
 2016  */
 2017 /*ARGSUSED*/
 2018 struct ifaddr *
 2019 ifa_ifwithaddr(const struct sockaddr *addr)
 2020 {
 2021         struct ifnet *ifp;
 2022         struct ifaddr *ifa;
 2023 
 2024         MPASS(in_epoch(net_epoch_preempt));
 2025         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 2026                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 2027                         if (ifa->ifa_addr->sa_family != addr->sa_family)
 2028                                 continue;
 2029                         if (sa_equal(addr, ifa->ifa_addr)) {
 2030                                 goto done;
 2031                         }
 2032                         /* IP6 doesn't have broadcast */
 2033                         if ((ifp->if_flags & IFF_BROADCAST) &&
 2034                             ifa->ifa_broadaddr &&
 2035                             ifa->ifa_broadaddr->sa_len != 0 &&
 2036                             sa_equal(ifa->ifa_broadaddr, addr)) {
 2037                                 goto done;
 2038                         }
 2039                 }
 2040         }
 2041         ifa = NULL;
 2042 done:
 2043         return (ifa);
 2044 }
 2045 
 2046 int
 2047 ifa_ifwithaddr_check(const struct sockaddr *addr)
 2048 {
 2049         int rc;
 2050 
 2051         NET_EPOCH_ENTER();
 2052         rc = (ifa_ifwithaddr(addr) != NULL);
 2053         NET_EPOCH_EXIT();
 2054         return (rc);
 2055 }
 2056 
 2057 /*
 2058  * Locate an interface based on the broadcast address.
 2059  */
 2060 /* ARGSUSED */
 2061 struct ifaddr *
 2062 ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
 2063 {
 2064         struct ifnet *ifp;
 2065         struct ifaddr *ifa;
 2066 
 2067         MPASS(in_epoch(net_epoch_preempt));
 2068         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 2069                 if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 2070                         continue;
 2071                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 2072                         if (ifa->ifa_addr->sa_family != addr->sa_family)
 2073                                 continue;
 2074                         if ((ifp->if_flags & IFF_BROADCAST) &&
 2075                             ifa->ifa_broadaddr &&
 2076                             ifa->ifa_broadaddr->sa_len != 0 &&
 2077                             sa_equal(ifa->ifa_broadaddr, addr)) {
 2078                                 goto done;
 2079                         }
 2080                 }
 2081         }
 2082         ifa = NULL;
 2083 done:
 2084         return (ifa);
 2085 }
 2086 
 2087 /*
 2088  * Locate the point to point interface with a given destination address.
 2089  */
 2090 /*ARGSUSED*/
 2091 struct ifaddr *
 2092 ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
 2093 {
 2094         struct ifnet *ifp;
 2095         struct ifaddr *ifa;
 2096 
 2097         MPASS(in_epoch(net_epoch_preempt));
 2098         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 2099                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 2100                         continue;
 2101                 if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 2102                         continue;
 2103                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 2104                         if (ifa->ifa_addr->sa_family != addr->sa_family)
 2105                                 continue;
 2106                         if (ifa->ifa_dstaddr != NULL &&
 2107                             sa_equal(addr, ifa->ifa_dstaddr)) {
 2108                                 goto done;
 2109                         }
 2110                 }
 2111         }
 2112         ifa = NULL;
 2113 done:
 2114         return (ifa);
 2115 }
 2116 
 2117 /*
 2118  * Find an interface on a specific network.  If many, choice
 2119  * is most specific found.
 2120  */
 2121 struct ifaddr *
 2122 ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
 2123 {
 2124         struct ifnet *ifp;
 2125         struct ifaddr *ifa;
 2126         struct ifaddr *ifa_maybe = NULL;
 2127         u_int af = addr->sa_family;
 2128         const char *addr_data = addr->sa_data, *cplim;
 2129 
 2130         MPASS(in_epoch(net_epoch_preempt));
 2131         /*
 2132          * AF_LINK addresses can be looked up directly by their index number,
 2133          * so do that if we can.
 2134          */
 2135         if (af == AF_LINK) {
 2136             const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
 2137             if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
 2138                 return (ifaddr_byindex(sdl->sdl_index));
 2139         }
 2140 
 2141         /*
 2142          * Scan though each interface, looking for ones that have addresses
 2143          * in this address family and the requested fib.  Maintain a reference
 2144          * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that
 2145          * kept it stable when we move onto the next interface.
 2146          */
 2147         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 2148                 if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
 2149                         continue;
 2150                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 2151                         const char *cp, *cp2, *cp3;
 2152 
 2153                         if (ifa->ifa_addr->sa_family != af)
 2154 next:                           continue;
 2155                         if (af == AF_INET && 
 2156                             ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) {
 2157                                 /*
 2158                                  * This is a bit broken as it doesn't
 2159                                  * take into account that the remote end may
 2160                                  * be a single node in the network we are
 2161                                  * looking for.
 2162                                  * The trouble is that we don't know the
 2163                                  * netmask for the remote end.
 2164                                  */
 2165                                 if (ifa->ifa_dstaddr != NULL &&
 2166                                     sa_equal(addr, ifa->ifa_dstaddr)) {
 2167                                         goto done;
 2168                                 }
 2169                         } else {
 2170                                 /*
 2171                                  * Scan all the bits in the ifa's address.
 2172                                  * If a bit dissagrees with what we are
 2173                                  * looking for, mask it with the netmask
 2174                                  * to see if it really matters.
 2175                                  * (A byte at a time)
 2176                                  */
 2177                                 if (ifa->ifa_netmask == 0)
 2178                                         continue;
 2179                                 cp = addr_data;
 2180                                 cp2 = ifa->ifa_addr->sa_data;
 2181                                 cp3 = ifa->ifa_netmask->sa_data;
 2182                                 cplim = ifa->ifa_netmask->sa_len
 2183                                         + (char *)ifa->ifa_netmask;
 2184                                 while (cp3 < cplim)
 2185                                         if ((*cp++ ^ *cp2++) & *cp3++)
 2186                                                 goto next; /* next address! */
 2187                                 /*
 2188                                  * If the netmask of what we just found
 2189                                  * is more specific than what we had before
 2190                                  * (if we had one), or if the virtual status
 2191                                  * of new prefix is better than of the old one,
 2192                                  * then remember the new one before continuing
 2193                                  * to search for an even better one.
 2194                                  */
 2195                                 if (ifa_maybe == NULL ||
 2196                                     ifa_preferred(ifa_maybe, ifa) ||
 2197                                     rn_refines((caddr_t)ifa->ifa_netmask,
 2198                                     (caddr_t)ifa_maybe->ifa_netmask)) {
 2199                                         ifa_maybe = ifa;
 2200                                 }
 2201                         }
 2202                 }
 2203         }
 2204         ifa = ifa_maybe;
 2205         ifa_maybe = NULL;
 2206 done:
 2207         return (ifa);
 2208 }
 2209 
 2210 /*
 2211  * Find an interface address specific to an interface best matching
 2212  * a given address.
 2213  */
 2214 struct ifaddr *
 2215 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
 2216 {
 2217         struct ifaddr *ifa;
 2218         const char *cp, *cp2, *cp3;
 2219         char *cplim;
 2220         struct ifaddr *ifa_maybe = NULL;
 2221         u_int af = addr->sa_family;
 2222 
 2223         if (af >= AF_MAX)
 2224                 return (NULL);
 2225 
 2226         MPASS(in_epoch(net_epoch_preempt));
 2227         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 2228                 if (ifa->ifa_addr->sa_family != af)
 2229                         continue;
 2230                 if (ifa_maybe == NULL)
 2231                         ifa_maybe = ifa;
 2232                 if (ifa->ifa_netmask == 0) {
 2233                         if (sa_equal(addr, ifa->ifa_addr) ||
 2234                             (ifa->ifa_dstaddr &&
 2235                             sa_equal(addr, ifa->ifa_dstaddr)))
 2236                                 goto done;
 2237                         continue;
 2238                 }
 2239                 if (ifp->if_flags & IFF_POINTOPOINT) {
 2240                         if (sa_equal(addr, ifa->ifa_dstaddr))
 2241                                 goto done;
 2242                 } else {
 2243                         cp = addr->sa_data;
 2244                         cp2 = ifa->ifa_addr->sa_data;
 2245                         cp3 = ifa->ifa_netmask->sa_data;
 2246                         cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
 2247                         for (; cp3 < cplim; cp3++)
 2248                                 if ((*cp++ ^ *cp2++) & *cp3)
 2249                                         break;
 2250                         if (cp3 == cplim)
 2251                                 goto done;
 2252                 }
 2253         }
 2254         ifa = ifa_maybe;
 2255 done:
 2256         return (ifa);
 2257 }
 2258 
 2259 /*
 2260  * See whether new ifa is better than current one:
 2261  * 1) A non-virtual one is preferred over virtual.
 2262  * 2) A virtual in master state preferred over any other state.
 2263  *
 2264  * Used in several address selecting functions.
 2265  */
 2266 int
 2267 ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
 2268 {
 2269 
 2270         return (cur->ifa_carp && (!next->ifa_carp ||
 2271             ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
 2272 }
 2273 
 2274 #include <net/if_llatbl.h>
 2275 
 2276 /*
 2277  * Default action when installing a route with a Link Level gateway.
 2278  * Lookup an appropriate real ifa to point to.
 2279  * This should be moved to /sys/net/link.c eventually.
 2280  */
 2281 static void
 2282 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
 2283 {
 2284         struct ifaddr *ifa, *oifa;
 2285         struct sockaddr *dst;
 2286         struct ifnet *ifp;
 2287 
 2288         if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
 2289             ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
 2290                 return;
 2291         NET_EPOCH_ENTER();
 2292         ifa = ifaof_ifpforaddr(dst, ifp);
 2293         if (ifa) {
 2294                 oifa = rt->rt_ifa;
 2295                 if (oifa != ifa) {
 2296                         ifa_free(oifa);
 2297                         ifa_ref(ifa);
 2298                 }
 2299                 rt->rt_ifa = ifa;
 2300                 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
 2301                         ifa->ifa_rtrequest(cmd, rt, info);
 2302         }
 2303         NET_EPOCH_EXIT();
 2304 }
 2305 
 2306 struct sockaddr_dl *
 2307 link_alloc_sdl(size_t size, int flags)
 2308 {
 2309 
 2310         return (malloc(size, M_TEMP, flags));
 2311 }
 2312 
 2313 void
 2314 link_free_sdl(struct sockaddr *sa)
 2315 {
 2316         free(sa, M_TEMP);
 2317 }
 2318 
 2319 /*
 2320  * Fills in given sdl with interface basic info.
 2321  * Returns pointer to filled sdl.
 2322  */
 2323 struct sockaddr_dl *
 2324 link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
 2325 {
 2326         struct sockaddr_dl *sdl;
 2327 
 2328         sdl = (struct sockaddr_dl *)paddr;
 2329         memset(sdl, 0, sizeof(struct sockaddr_dl));
 2330         sdl->sdl_len = sizeof(struct sockaddr_dl);
 2331         sdl->sdl_family = AF_LINK;
 2332         sdl->sdl_index = ifp->if_index;
 2333         sdl->sdl_type = iftype;
 2334 
 2335         return (sdl);
 2336 }
 2337 
 2338 /*
 2339  * Mark an interface down and notify protocols of
 2340  * the transition.
 2341  */
 2342 static void
 2343 if_unroute(struct ifnet *ifp, int flag, int fam)
 2344 {
 2345         struct ifaddr *ifa;
 2346 
 2347         KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
 2348 
 2349         ifp->if_flags &= ~flag;
 2350         getmicrotime(&ifp->if_lastchange);
 2351         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 2352                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 2353                         pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
 2354         ifp->if_qflush(ifp);
 2355 
 2356         if (ifp->if_carp)
 2357                 (*carp_linkstate_p)(ifp);
 2358         rt_ifmsg(ifp);
 2359 }
 2360 
 2361 /*
 2362  * Mark an interface up and notify protocols of
 2363  * the transition.
 2364  */
 2365 static void
 2366 if_route(struct ifnet *ifp, int flag, int fam)
 2367 {
 2368         struct ifaddr *ifa;
 2369 
 2370         KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
 2371 
 2372         ifp->if_flags |= flag;
 2373         getmicrotime(&ifp->if_lastchange);
 2374         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 2375                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 2376                         pfctlinput(PRC_IFUP, ifa->ifa_addr);
 2377         if (ifp->if_carp)
 2378                 (*carp_linkstate_p)(ifp);
 2379         rt_ifmsg(ifp);
 2380 #ifdef INET6
 2381         in6_if_up(ifp);
 2382 #endif
 2383 }
 2384 
 2385 void    (*vlan_link_state_p)(struct ifnet *);   /* XXX: private from if_vlan */
 2386 void    (*vlan_trunk_cap_p)(struct ifnet *);            /* XXX: private from if_vlan */
 2387 struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
 2388 struct  ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t);
 2389 int     (*vlan_tag_p)(struct ifnet *, uint16_t *);
 2390 int     (*vlan_pcp_p)(struct ifnet *, uint16_t *);
 2391 int     (*vlan_setcookie_p)(struct ifnet *, void *);
 2392 void    *(*vlan_cookie_p)(struct ifnet *);
 2393 
 2394 /*
 2395  * Handle a change in the interface link state. To avoid LORs
 2396  * between driver lock and upper layer locks, as well as possible
 2397  * recursions, we post event to taskqueue, and all job
 2398  * is done in static do_link_state_change().
 2399  */
 2400 void
 2401 if_link_state_change(struct ifnet *ifp, int link_state)
 2402 {
 2403         /* Return if state hasn't changed. */
 2404         if (ifp->if_link_state == link_state)
 2405                 return;
 2406 
 2407         ifp->if_link_state = link_state;
 2408 
 2409         taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
 2410 }
 2411 
 2412 static void
 2413 do_link_state_change(void *arg, int pending)
 2414 {
 2415         struct ifnet *ifp = (struct ifnet *)arg;
 2416         int link_state = ifp->if_link_state;
 2417         CURVNET_SET(ifp->if_vnet);
 2418 
 2419         /* Notify that the link state has changed. */
 2420         rt_ifmsg(ifp);
 2421         if (ifp->if_vlantrunk != NULL)
 2422                 (*vlan_link_state_p)(ifp);
 2423 
 2424         if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
 2425             ifp->if_l2com != NULL)
 2426                 (*ng_ether_link_state_p)(ifp, link_state);
 2427         if (ifp->if_carp)
 2428                 (*carp_linkstate_p)(ifp);
 2429         if (ifp->if_bridge)
 2430                 ifp->if_bridge_linkstate(ifp);
 2431         if (ifp->if_lagg)
 2432                 (*lagg_linkstate_p)(ifp, link_state);
 2433 
 2434         if (IS_DEFAULT_VNET(curvnet))
 2435                 devctl_notify("IFNET", ifp->if_xname,
 2436                     (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN",
 2437                     NULL);
 2438         if (pending > 1)
 2439                 if_printf(ifp, "%d link states coalesced\n", pending);
 2440         if (log_link_state_change)
 2441                 if_printf(ifp, "link state changed to %s\n",
 2442                     (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
 2443         EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
 2444         CURVNET_RESTORE();
 2445 }
 2446 
 2447 /*
 2448  * Mark an interface down and notify protocols of
 2449  * the transition.
 2450  */
 2451 void
 2452 if_down(struct ifnet *ifp)
 2453 {
 2454 
 2455         EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
 2456         if_unroute(ifp, IFF_UP, AF_UNSPEC);
 2457 }
 2458 
 2459 /*
 2460  * Mark an interface up and notify protocols of
 2461  * the transition.
 2462  */
 2463 void
 2464 if_up(struct ifnet *ifp)
 2465 {
 2466 
 2467         if_route(ifp, IFF_UP, AF_UNSPEC);
 2468         EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
 2469 }
 2470 
 2471 /*
 2472  * Flush an interface queue.
 2473  */
 2474 void
 2475 if_qflush(struct ifnet *ifp)
 2476 {
 2477         struct mbuf *m, *n;
 2478         struct ifaltq *ifq;
 2479         
 2480         ifq = &ifp->if_snd;
 2481         IFQ_LOCK(ifq);
 2482 #ifdef ALTQ
 2483         if (ALTQ_IS_ENABLED(ifq))
 2484                 ALTQ_PURGE(ifq);
 2485 #endif
 2486         n = ifq->ifq_head;
 2487         while ((m = n) != NULL) {
 2488                 n = m->m_nextpkt;
 2489                 m_freem(m);
 2490         }
 2491         ifq->ifq_head = 0;
 2492         ifq->ifq_tail = 0;
 2493         ifq->ifq_len = 0;
 2494         IFQ_UNLOCK(ifq);
 2495 }
 2496 
 2497 /*
 2498  * Map interface name to interface structure pointer, with or without
 2499  * returning a reference.
 2500  */
 2501 struct ifnet *
 2502 ifunit_ref(const char *name)
 2503 {
 2504         struct ifnet *ifp;
 2505 
 2506         IFNET_RLOCK_NOSLEEP();
 2507         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 2508                 if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 &&
 2509                     !(ifp->if_flags & IFF_DYING))
 2510                         break;
 2511         }
 2512         if (ifp != NULL)
 2513                 if_ref(ifp);
 2514         IFNET_RUNLOCK_NOSLEEP();
 2515         return (ifp);
 2516 }
 2517 
 2518 struct ifnet *
 2519 ifunit(const char *name)
 2520 {
 2521         struct ifnet *ifp;
 2522 
 2523         IFNET_RLOCK_NOSLEEP();
 2524         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 2525                 if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
 2526                         break;
 2527         }
 2528         IFNET_RUNLOCK_NOSLEEP();
 2529         return (ifp);
 2530 }
 2531 
 2532 void *
 2533 ifr_buffer_get_buffer(void *data)
 2534 {
 2535         union ifreq_union *ifrup;
 2536 
 2537         ifrup = data;
 2538 #ifdef COMPAT_FREEBSD32
 2539         if (SV_CURPROC_FLAG(SV_ILP32))
 2540                 return ((void *)(uintptr_t)
 2541                     ifrup->ifr32.ifr_ifru.ifru_buffer.buffer);
 2542 #endif
 2543         return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer);
 2544 }
 2545 
 2546 static void
 2547 ifr_buffer_set_buffer_null(void *data)
 2548 {
 2549         union ifreq_union *ifrup;
 2550 
 2551         ifrup = data;
 2552 #ifdef COMPAT_FREEBSD32
 2553         if (SV_CURPROC_FLAG(SV_ILP32))
 2554                 ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0;
 2555         else
 2556 #endif
 2557                 ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL;
 2558 }
 2559 
 2560 size_t
 2561 ifr_buffer_get_length(void *data)
 2562 {
 2563         union ifreq_union *ifrup;
 2564 
 2565         ifrup = data;
 2566 #ifdef COMPAT_FREEBSD32
 2567         if (SV_CURPROC_FLAG(SV_ILP32))
 2568                 return (ifrup->ifr32.ifr_ifru.ifru_buffer.length);
 2569 #endif
 2570         return (ifrup->ifr.ifr_ifru.ifru_buffer.length);
 2571 }
 2572 
 2573 static void
 2574 ifr_buffer_set_length(void *data, size_t len)
 2575 {
 2576         union ifreq_union *ifrup;
 2577 
 2578         ifrup = data;
 2579 #ifdef COMPAT_FREEBSD32
 2580         if (SV_CURPROC_FLAG(SV_ILP32))
 2581                 ifrup->ifr32.ifr_ifru.ifru_buffer.length = len;
 2582         else
 2583 #endif
 2584                 ifrup->ifr.ifr_ifru.ifru_buffer.length = len;
 2585 }
 2586 
 2587 void *
 2588 ifr_data_get_ptr(void *ifrp)
 2589 {
 2590         union ifreq_union *ifrup;
 2591 
 2592         ifrup = ifrp;
 2593 #ifdef COMPAT_FREEBSD32
 2594         if (SV_CURPROC_FLAG(SV_ILP32))
 2595                 return ((void *)(uintptr_t)
 2596                     ifrup->ifr32.ifr_ifru.ifru_data);
 2597 #endif
 2598                 return (ifrup->ifr.ifr_ifru.ifru_data);
 2599 }
 2600 
 2601 /*
 2602  * Hardware specific interface ioctls.
 2603  */
 2604 int
 2605 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 2606 {
 2607         struct ifreq *ifr;
 2608         int error = 0, do_ifup = 0;
 2609         int new_flags, temp_flags;
 2610         size_t namelen, onamelen;
 2611         size_t descrlen;
 2612         char *descrbuf, *odescrbuf;
 2613         char new_name[IFNAMSIZ];
 2614         struct ifaddr *ifa;
 2615         struct sockaddr_dl *sdl;
 2616 
 2617         ifr = (struct ifreq *)data;
 2618         switch (cmd) {
 2619         case SIOCGIFINDEX:
 2620                 ifr->ifr_index = ifp->if_index;
 2621                 break;
 2622 
 2623         case SIOCGIFFLAGS:
 2624                 temp_flags = ifp->if_flags | ifp->if_drv_flags;
 2625                 ifr->ifr_flags = temp_flags & 0xffff;
 2626                 ifr->ifr_flagshigh = temp_flags >> 16;
 2627                 break;
 2628 
 2629         case SIOCGIFCAP:
 2630                 ifr->ifr_reqcap = ifp->if_capabilities;
 2631                 ifr->ifr_curcap = ifp->if_capenable;
 2632                 break;
 2633 
 2634         case SIOCGIFDATA:
 2635         {
 2636                 struct if_data ifd;
 2637 
 2638                 /* Ensure uninitialised padding is not leaked. */
 2639                 memset(&ifd, 0, sizeof(ifd));
 2640 
 2641                 if_data_copy(ifp, &ifd);
 2642                 error = copyout(&ifd, ifr_data_get_ptr(ifr), sizeof(ifd));
 2643                 break;
 2644         }
 2645 
 2646 #ifdef MAC
 2647         case SIOCGIFMAC:
 2648                 error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
 2649                 break;
 2650 #endif
 2651 
 2652         case SIOCGIFMETRIC:
 2653                 ifr->ifr_metric = ifp->if_metric;
 2654                 break;
 2655 
 2656         case SIOCGIFMTU:
 2657                 ifr->ifr_mtu = ifp->if_mtu;
 2658                 break;
 2659 
 2660         case SIOCGIFPHYS:
 2661                 /* XXXGL: did this ever worked? */
 2662                 ifr->ifr_phys = 0;
 2663                 break;
 2664 
 2665         case SIOCGIFDESCR:
 2666                 error = 0;
 2667                 sx_slock(&ifdescr_sx);
 2668                 if (ifp->if_description == NULL)
 2669                         error = ENOMSG;
 2670                 else {
 2671                         /* space for terminating nul */
 2672                         descrlen = strlen(ifp->if_description) + 1;
 2673                         if (ifr_buffer_get_length(ifr) < descrlen)
 2674                                 ifr_buffer_set_buffer_null(ifr);
 2675                         else
 2676                                 error = copyout(ifp->if_description,
 2677                                     ifr_buffer_get_buffer(ifr), descrlen);
 2678                         ifr_buffer_set_length(ifr, descrlen);
 2679                 }
 2680                 sx_sunlock(&ifdescr_sx);
 2681                 break;
 2682 
 2683         case SIOCSIFDESCR:
 2684                 error = priv_check(td, PRIV_NET_SETIFDESCR);
 2685                 if (error)
 2686                         return (error);
 2687 
 2688                 /*
 2689                  * Copy only (length-1) bytes to make sure that
 2690                  * if_description is always nul terminated.  The
 2691                  * length parameter is supposed to count the
 2692                  * terminating nul in.
 2693                  */
 2694                 if (ifr_buffer_get_length(ifr) > ifdescr_maxlen)
 2695                         return (ENAMETOOLONG);
 2696                 else if (ifr_buffer_get_length(ifr) == 0)
 2697                         descrbuf = NULL;
 2698                 else {
 2699                         descrbuf = malloc(ifr_buffer_get_length(ifr),
 2700                             M_IFDESCR, M_WAITOK | M_ZERO);
 2701                         error = copyin(ifr_buffer_get_buffer(ifr), descrbuf,
 2702                             ifr_buffer_get_length(ifr) - 1);
 2703                         if (error) {
 2704                                 free(descrbuf, M_IFDESCR);
 2705                                 break;
 2706                         }
 2707                 }
 2708 
 2709                 sx_xlock(&ifdescr_sx);
 2710                 odescrbuf = ifp->if_description;
 2711                 ifp->if_description = descrbuf;
 2712                 sx_xunlock(&ifdescr_sx);
 2713 
 2714                 getmicrotime(&ifp->if_lastchange);
 2715                 free(odescrbuf, M_IFDESCR);
 2716                 break;
 2717 
 2718         case SIOCGIFFIB:
 2719                 ifr->ifr_fib = ifp->if_fib;
 2720                 break;
 2721 
 2722         case SIOCSIFFIB:
 2723                 error = priv_check(td, PRIV_NET_SETIFFIB);
 2724                 if (error)
 2725                         return (error);
 2726                 if (ifr->ifr_fib >= rt_numfibs)
 2727                         return (EINVAL);
 2728 
 2729                 ifp->if_fib = ifr->ifr_fib;
 2730                 break;
 2731 
 2732         case SIOCSIFFLAGS:
 2733                 error = priv_check(td, PRIV_NET_SETIFFLAGS);
 2734                 if (error)
 2735                         return (error);
 2736                 /*
 2737                  * Currently, no driver owned flags pass the IFF_CANTCHANGE
 2738                  * check, so we don't need special handling here yet.
 2739                  */
 2740                 new_flags = (ifr->ifr_flags & 0xffff) |
 2741                     (ifr->ifr_flagshigh << 16);
 2742                 if (ifp->if_flags & IFF_UP &&
 2743                     (new_flags & IFF_UP) == 0) {
 2744                         if_down(ifp);
 2745                 } else if (new_flags & IFF_UP &&
 2746                     (ifp->if_flags & IFF_UP) == 0) {
 2747                         do_ifup = 1;
 2748                 }
 2749                 /* See if permanently promiscuous mode bit is about to flip */
 2750                 if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
 2751                         if (new_flags & IFF_PPROMISC)
 2752                                 ifp->if_flags |= IFF_PROMISC;
 2753                         else if (ifp->if_pcount == 0)
 2754                                 ifp->if_flags &= ~IFF_PROMISC;
 2755                         if (log_promisc_mode_change)
 2756                                 if_printf(ifp, "permanently promiscuous mode %s\n",
 2757                                     ((new_flags & IFF_PPROMISC) ?
 2758                                      "enabled" : "disabled"));
 2759                 }
 2760                 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 2761                         (new_flags &~ IFF_CANTCHANGE);
 2762                 if (ifp->if_ioctl) {
 2763                         (void) (*ifp->if_ioctl)(ifp, cmd, data);
 2764                 }
 2765                 if (do_ifup)
 2766                         if_up(ifp);
 2767                 getmicrotime(&ifp->if_lastchange);
 2768                 break;
 2769 
 2770         case SIOCSIFCAP:
 2771                 error = priv_check(td, PRIV_NET_SETIFCAP);
 2772                 if (error)
 2773                         return (error);
 2774                 if (ifp->if_ioctl == NULL)
 2775                         return (EOPNOTSUPP);
 2776                 if (ifr->ifr_reqcap & ~ifp->if_capabilities)
 2777                         return (EINVAL);
 2778                 error = (*ifp->if_ioctl)(ifp, cmd, data);
 2779                 if (error == 0)
 2780                         getmicrotime(&ifp->if_lastchange);
 2781                 break;
 2782 
 2783 #ifdef MAC
 2784         case SIOCSIFMAC:
 2785                 error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
 2786                 break;
 2787 #endif
 2788 
 2789         case SIOCSIFNAME:
 2790                 error = priv_check(td, PRIV_NET_SETIFNAME);
 2791                 if (error)
 2792                         return (error);
 2793                 error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ,
 2794                     NULL);
 2795                 if (error != 0)
 2796                         return (error);
 2797                 if (new_name[0] == '\0')
 2798                         return (EINVAL);
 2799                 if (new_name[IFNAMSIZ-1] != '\0') {
 2800                         new_name[IFNAMSIZ-1] = '\0';
 2801                         if (strlen(new_name) == IFNAMSIZ-1)
 2802                                 return (EINVAL);
 2803                 }
 2804                 if (strcmp(new_name, ifp->if_xname) == 0)
 2805                         break;
 2806                 if (ifunit(new_name) != NULL)
 2807                         return (EEXIST);
 2808 
 2809                 /*
 2810                  * XXX: Locking.  Nothing else seems to lock if_flags,
 2811                  * and there are numerous other races with the
 2812                  * ifunit() checks not being atomic with namespace
 2813                  * changes (renames, vmoves, if_attach, etc).
 2814                  */
 2815                 ifp->if_flags |= IFF_RENAMING;
 2816                 
 2817                 /* Announce the departure of the interface. */
 2818                 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 2819                 EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 2820 
 2821                 if_printf(ifp, "changing name to '%s'\n", new_name);
 2822 
 2823                 IF_ADDR_WLOCK(ifp);
 2824                 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
 2825                 ifa = ifp->if_addr;
 2826                 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 2827                 namelen = strlen(new_name);
 2828                 onamelen = sdl->sdl_nlen;
 2829                 /*
 2830                  * Move the address if needed.  This is safe because we
 2831                  * allocate space for a name of length IFNAMSIZ when we
 2832                  * create this in if_attach().
 2833                  */
 2834                 if (namelen != onamelen) {
 2835                         bcopy(sdl->sdl_data + onamelen,
 2836                             sdl->sdl_data + namelen, sdl->sdl_alen);
 2837                 }
 2838                 bcopy(new_name, sdl->sdl_data, namelen);
 2839                 sdl->sdl_nlen = namelen;
 2840                 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
 2841                 bzero(sdl->sdl_data, onamelen);
 2842                 while (namelen != 0)
 2843                         sdl->sdl_data[--namelen] = 0xff;
 2844                 IF_ADDR_WUNLOCK(ifp);
 2845 
 2846                 EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 2847                 /* Announce the return of the interface. */
 2848                 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 2849 
 2850                 ifp->if_flags &= ~IFF_RENAMING;
 2851                 break;
 2852 
 2853 #ifdef VIMAGE
 2854         case SIOCSIFVNET:
 2855                 error = priv_check(td, PRIV_NET_SETIFVNET);
 2856                 if (error)
 2857                         return (error);
 2858                 error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid);
 2859                 break;
 2860 #endif
 2861 
 2862         case SIOCSIFMETRIC:
 2863                 error = priv_check(td, PRIV_NET_SETIFMETRIC);
 2864                 if (error)
 2865                         return (error);
 2866                 ifp->if_metric = ifr->ifr_metric;
 2867                 getmicrotime(&ifp->if_lastchange);
 2868                 break;
 2869 
 2870         case SIOCSIFPHYS:
 2871                 error = priv_check(td, PRIV_NET_SETIFPHYS);
 2872                 if (error)
 2873                         return (error);
 2874                 if (ifp->if_ioctl == NULL)
 2875                         return (EOPNOTSUPP);
 2876                 error = (*ifp->if_ioctl)(ifp, cmd, data);
 2877                 if (error == 0)
 2878                         getmicrotime(&ifp->if_lastchange);
 2879                 break;
 2880 
 2881         case SIOCSIFMTU:
 2882         {
 2883                 u_long oldmtu = ifp->if_mtu;
 2884 
 2885                 error = priv_check(td, PRIV_NET_SETIFMTU);
 2886                 if (error)
 2887                         return (error);
 2888                 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
 2889                         return (EINVAL);
 2890                 if (ifp->if_ioctl == NULL)
 2891                         return (EOPNOTSUPP);
 2892                 error = (*ifp->if_ioctl)(ifp, cmd, data);
 2893                 if (error == 0) {
 2894                         getmicrotime(&ifp->if_lastchange);
 2895                         rt_ifmsg(ifp);
 2896 #ifdef INET
 2897                         NETDUMP_REINIT(ifp);
 2898 #endif
 2899                 }
 2900                 /*
 2901                  * If the link MTU changed, do network layer specific procedure.
 2902                  */
 2903                 if (ifp->if_mtu != oldmtu) {
 2904 #ifdef INET6
 2905                         nd6_setmtu(ifp);
 2906 #endif
 2907                         rt_updatemtu(ifp);
 2908                 }
 2909                 break;
 2910         }
 2911 
 2912         case SIOCADDMULTI:
 2913         case SIOCDELMULTI:
 2914                 if (cmd == SIOCADDMULTI)
 2915                         error = priv_check(td, PRIV_NET_ADDMULTI);
 2916                 else
 2917                         error = priv_check(td, PRIV_NET_DELMULTI);
 2918                 if (error)
 2919                         return (error);
 2920 
 2921                 /* Don't allow group membership on non-multicast interfaces. */
 2922                 if ((ifp->if_flags & IFF_MULTICAST) == 0)
 2923                         return (EOPNOTSUPP);
 2924 
 2925                 /* Don't let users screw up protocols' entries. */
 2926                 if (ifr->ifr_addr.sa_family != AF_LINK)
 2927                         return (EINVAL);
 2928 
 2929                 if (cmd == SIOCADDMULTI) {
 2930                         struct ifmultiaddr *ifma;
 2931 
 2932                         /*
 2933                          * Userland is only permitted to join groups once
 2934                          * via the if_addmulti() KPI, because it cannot hold
 2935                          * struct ifmultiaddr * between calls. It may also
 2936                          * lose a race while we check if the membership
 2937                          * already exists.
 2938                          */
 2939                         IF_ADDR_RLOCK(ifp);
 2940                         ifma = if_findmulti(ifp, &ifr->ifr_addr);
 2941                         IF_ADDR_RUNLOCK(ifp);
 2942                         if (ifma != NULL)
 2943                                 error = EADDRINUSE;
 2944                         else
 2945                                 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
 2946                 } else {
 2947                         error = if_delmulti(ifp, &ifr->ifr_addr);
 2948                 }
 2949                 if (error == 0)
 2950                         getmicrotime(&ifp->if_lastchange);
 2951                 break;
 2952 
 2953         case SIOCSIFPHYADDR:
 2954         case SIOCDIFPHYADDR:
 2955 #ifdef INET6
 2956         case SIOCSIFPHYADDR_IN6:
 2957 #endif
 2958         case SIOCSIFMEDIA:
 2959         case SIOCSIFGENERIC:
 2960                 error = priv_check(td, PRIV_NET_HWIOCTL);
 2961                 if (error)
 2962                         return (error);
 2963                 if (ifp->if_ioctl == NULL)
 2964                         return (EOPNOTSUPP);
 2965                 error = (*ifp->if_ioctl)(ifp, cmd, data);
 2966                 if (error == 0)
 2967                         getmicrotime(&ifp->if_lastchange);
 2968                 break;
 2969 
 2970         case SIOCGIFSTATUS:
 2971         case SIOCGIFPSRCADDR:
 2972         case SIOCGIFPDSTADDR:
 2973         case SIOCGIFMEDIA:
 2974         case SIOCGIFXMEDIA:
 2975         case SIOCGIFGENERIC:
 2976         case SIOCGIFRSSKEY:
 2977         case SIOCGIFRSSHASH:
 2978         case SIOCGIFDOWNREASON:
 2979                 if (ifp->if_ioctl == NULL)
 2980                         return (EOPNOTSUPP);
 2981                 error = (*ifp->if_ioctl)(ifp, cmd, data);
 2982                 break;
 2983 
 2984         case SIOCSIFLLADDR:
 2985                 error = priv_check(td, PRIV_NET_SETLLADDR);
 2986                 if (error)
 2987                         return (error);
 2988                 error = if_setlladdr(ifp,
 2989                     ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
 2990                 break;
 2991 
 2992         case SIOCGHWADDR:
 2993                 error = if_gethwaddr(ifp, ifr);
 2994                 break;
 2995 
 2996         case CASE_IOC_IFGROUPREQ(SIOCAIFGROUP):
 2997                 error = priv_check(td, PRIV_NET_ADDIFGROUP);
 2998                 if (error)
 2999                         return (error);
 3000                 if ((error = if_addgroup(ifp,
 3001                     ifgr_group_get((struct ifgroupreq *)data))))
 3002                         return (error);
 3003                 break;
 3004 
 3005         case CASE_IOC_IFGROUPREQ(SIOCGIFGROUP):
 3006                 if ((error = if_getgroup((struct ifgroupreq *)data, ifp)))
 3007                         return (error);
 3008                 break;
 3009 
 3010         case CASE_IOC_IFGROUPREQ(SIOCDIFGROUP):
 3011                 error = priv_check(td, PRIV_NET_DELIFGROUP);
 3012                 if (error)
 3013                         return (error);
 3014                 if ((error = if_delgroup(ifp,
 3015                     ifgr_group_get((struct ifgroupreq *)data))))
 3016                         return (error);
 3017                 break;
 3018 
 3019         default:
 3020                 error = ENOIOCTL;
 3021                 break;
 3022         }
 3023         return (error);
 3024 }
 3025 
 3026 #ifdef COMPAT_FREEBSD32
 3027 struct ifconf32 {
 3028         int32_t ifc_len;
 3029         union {
 3030                 uint32_t        ifcu_buf;
 3031                 uint32_t        ifcu_req;
 3032         } ifc_ifcu;
 3033 };
 3034 #define SIOCGIFCONF32   _IOWR('i', 36, struct ifconf32)
 3035 #endif
 3036 
 3037 #ifdef COMPAT_FREEBSD32
 3038 static void
 3039 ifmr_init(struct ifmediareq *ifmr, caddr_t data)
 3040 {
 3041         struct ifmediareq32 *ifmr32;
 3042 
 3043         ifmr32 = (struct ifmediareq32 *)data;
 3044         memcpy(ifmr->ifm_name, ifmr32->ifm_name,
 3045             sizeof(ifmr->ifm_name));
 3046         ifmr->ifm_current = ifmr32->ifm_current;
 3047         ifmr->ifm_mask = ifmr32->ifm_mask;
 3048         ifmr->ifm_status = ifmr32->ifm_status;
 3049         ifmr->ifm_active = ifmr32->ifm_active;
 3050         ifmr->ifm_count = ifmr32->ifm_count;
 3051         ifmr->ifm_ulist = (int *)(uintptr_t)ifmr32->ifm_ulist;
 3052 }
 3053 
 3054 static void
 3055 ifmr_update(const struct ifmediareq *ifmr, caddr_t data)
 3056 {
 3057         struct ifmediareq32 *ifmr32;
 3058 
 3059         ifmr32 = (struct ifmediareq32 *)data;
 3060         ifmr32->ifm_current = ifmr->ifm_current;
 3061         ifmr32->ifm_mask = ifmr->ifm_mask;
 3062         ifmr32->ifm_status = ifmr->ifm_status;
 3063         ifmr32->ifm_active = ifmr->ifm_active;
 3064         ifmr32->ifm_count = ifmr->ifm_count;
 3065 }
 3066 #endif
 3067 
 3068 /*
 3069  * Interface ioctls.
 3070  */
 3071 int
 3072 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 3073 {
 3074 #ifdef COMPAT_FREEBSD32
 3075         caddr_t saved_data = NULL;
 3076         struct ifmediareq ifmr;
 3077         struct ifmediareq *ifmrp;
 3078 #endif
 3079         struct ifnet *ifp;
 3080         struct ifreq *ifr;
 3081         int error;
 3082         int oif_flags;
 3083 #ifdef VIMAGE
 3084         int shutdown;
 3085 #endif
 3086 
 3087         CURVNET_SET(so->so_vnet);
 3088 #ifdef VIMAGE
 3089         /* Make sure the VNET is stable. */
 3090         shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
 3091                  so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
 3092         if (shutdown) {
 3093                 CURVNET_RESTORE();
 3094                 return (EBUSY);
 3095         }
 3096 #endif
 3097 
 3098 
 3099         switch (cmd) {
 3100         case SIOCGIFCONF:
 3101                 error = ifconf(cmd, data);
 3102                 CURVNET_RESTORE();
 3103                 return (error);
 3104 
 3105 #ifdef COMPAT_FREEBSD32
 3106         case SIOCGIFCONF32:
 3107                 {
 3108                         struct ifconf32 *ifc32;
 3109                         struct ifconf ifc;
 3110 
 3111                         ifc32 = (struct ifconf32 *)data;
 3112                         ifc.ifc_len = ifc32->ifc_len;
 3113                         ifc.ifc_buf = PTRIN(ifc32->ifc_buf);
 3114 
 3115                         error = ifconf(SIOCGIFCONF, (void *)&ifc);
 3116                         CURVNET_RESTORE();
 3117                         if (error == 0)
 3118                                 ifc32->ifc_len = ifc.ifc_len;
 3119                         return (error);
 3120                 }
 3121 #endif
 3122         }
 3123 
 3124 #ifdef COMPAT_FREEBSD32
 3125         ifmrp = NULL;
 3126         switch (cmd) {
 3127         case SIOCGIFMEDIA32:
 3128         case SIOCGIFXMEDIA32:
 3129                 ifmrp = &ifmr;
 3130                 ifmr_init(ifmrp, data);
 3131                 cmd = _IOC_NEWTYPE(cmd, struct ifmediareq);
 3132                 saved_data = data;
 3133                 data = (caddr_t)ifmrp;
 3134         }
 3135 #endif
 3136 
 3137         ifr = (struct ifreq *)data;
 3138         switch (cmd) {
 3139 #ifdef VIMAGE
 3140         case SIOCSIFRVNET:
 3141                 error = priv_check(td, PRIV_NET_SETIFVNET);
 3142                 if (error == 0)
 3143                         error = if_vmove_reclaim(td, ifr->ifr_name,
 3144                             ifr->ifr_jid);
 3145                 goto out_noref;
 3146 #endif
 3147         case SIOCIFCREATE:
 3148         case SIOCIFCREATE2:
 3149                 error = priv_check(td, PRIV_NET_IFCREATE);
 3150                 if (error == 0)
 3151                         error = if_clone_create(ifr->ifr_name,
 3152                             sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
 3153                             ifr_data_get_ptr(ifr) : NULL);
 3154                 goto out_noref;
 3155         case SIOCIFDESTROY:
 3156                 error = priv_check(td, PRIV_NET_IFDESTROY);
 3157 
 3158                 if (error == 0) {
 3159                         sx_xlock(&ifnet_detach_sxlock);
 3160                         error = if_clone_destroy(ifr->ifr_name);
 3161                         sx_xunlock(&ifnet_detach_sxlock);
 3162                 }
 3163                 goto out_noref;
 3164 
 3165         case SIOCIFGCLONERS:
 3166                 error = if_clone_list((struct if_clonereq *)data);
 3167                 goto out_noref;
 3168 
 3169         case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
 3170                 error = if_getgroupmembers((struct ifgroupreq *)data);
 3171                 goto out_noref;
 3172 
 3173 #if defined(INET) || defined(INET6)
 3174         case SIOCSVH:
 3175         case SIOCGVH:
 3176                 if (carp_ioctl_p == NULL)
 3177                         error = EPROTONOSUPPORT;
 3178                 else
 3179                         error = (*carp_ioctl_p)(ifr, cmd, td);
 3180                 goto out_noref;
 3181 #endif
 3182         }
 3183 
 3184         ifp = ifunit_ref(ifr->ifr_name);
 3185         if (ifp == NULL) {
 3186                 error = ENXIO;
 3187                 goto out_noref;
 3188         }
 3189 
 3190         error = ifhwioctl(cmd, ifp, data, td);
 3191         if (error != ENOIOCTL)
 3192                 goto out_ref;
 3193 
 3194         oif_flags = ifp->if_flags;
 3195         if (so->so_proto == NULL) {
 3196                 error = EOPNOTSUPP;
 3197                 goto out_ref;
 3198         }
 3199 
 3200         /*
 3201          * Pass the request on to the socket control method, and if the
 3202          * latter returns EOPNOTSUPP, directly to the interface.
 3203          *
 3204          * Make an exception for the legacy SIOCSIF* requests.  Drivers
 3205          * trust SIOCSIFADDR et al to come from an already privileged
 3206          * layer, and do not perform any credentials checks or input
 3207          * validation.
 3208          */
 3209         error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
 3210             ifp, td));
 3211         if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
 3212             cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
 3213             cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
 3214                 error = (*ifp->if_ioctl)(ifp, cmd, data);
 3215 
 3216         if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
 3217 #ifdef INET6
 3218                 if (ifp->if_flags & IFF_UP)
 3219                         in6_if_up(ifp);
 3220 #endif
 3221         }
 3222 
 3223 out_ref:
 3224         if_rele(ifp);
 3225 out_noref:
 3226 #ifdef COMPAT_FREEBSD32
 3227         if (ifmrp != NULL) {
 3228                 KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA),
 3229                     ("ifmrp non-NULL, but cmd is not an ifmedia req 0x%lx",
 3230                      cmd));
 3231                 data = saved_data;
 3232                 ifmr_update(ifmrp, data);
 3233         }
 3234 #endif
 3235         CURVNET_RESTORE();
 3236         return (error);
 3237 }
 3238 
 3239 /*
 3240  * The code common to handling reference counted flags,
 3241  * e.g., in ifpromisc() and if_allmulti().
 3242  * The "pflag" argument can specify a permanent mode flag to check,
 3243  * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
 3244  *
 3245  * Only to be used on stack-owned flags, not driver-owned flags.
 3246  */
 3247 static int
 3248 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
 3249 {
 3250         struct ifreq ifr;
 3251         int error;
 3252         int oldflags, oldcount;
 3253 
 3254         /* Sanity checks to catch programming errors */
 3255         KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
 3256             ("%s: setting driver-owned flag %d", __func__, flag));
 3257 
 3258         if (onswitch)
 3259                 KASSERT(*refcount >= 0,
 3260                     ("%s: increment negative refcount %d for flag %d",
 3261                     __func__, *refcount, flag));
 3262         else
 3263                 KASSERT(*refcount > 0,
 3264                     ("%s: decrement non-positive refcount %d for flag %d",
 3265                     __func__, *refcount, flag));
 3266 
 3267         /* In case this mode is permanent, just touch refcount */
 3268         if (ifp->if_flags & pflag) {
 3269                 *refcount += onswitch ? 1 : -1;
 3270                 return (0);
 3271         }
 3272 
 3273         /* Save ifnet parameters for if_ioctl() may fail */
 3274         oldcount = *refcount;
 3275         oldflags = ifp->if_flags;
 3276         
 3277         /*
 3278          * See if we aren't the only and touching refcount is enough.
 3279          * Actually toggle interface flag if we are the first or last.
 3280          */
 3281         if (onswitch) {
 3282                 if ((*refcount)++)
 3283                         return (0);
 3284                 ifp->if_flags |= flag;
 3285         } else {
 3286                 if (--(*refcount))
 3287                         return (0);
 3288                 ifp->if_flags &= ~flag;
 3289         }
 3290 
 3291         /* Call down the driver since we've changed interface flags */
 3292         if (ifp->if_ioctl == NULL) {
 3293                 error = EOPNOTSUPP;
 3294                 goto recover;
 3295         }
 3296         ifr.ifr_flags = ifp->if_flags & 0xffff;
 3297         ifr.ifr_flagshigh = ifp->if_flags >> 16;
 3298         error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 3299         if (error)
 3300                 goto recover;
 3301         /* Notify userland that interface flags have changed */
 3302         rt_ifmsg(ifp);
 3303         return (0);
 3304 
 3305 recover:
 3306         /* Recover after driver error */
 3307         *refcount = oldcount;
 3308         ifp->if_flags = oldflags;
 3309         return (error);
 3310 }
 3311 
 3312 /*
 3313  * Set/clear promiscuous mode on interface ifp based on the truth value
 3314  * of pswitch.  The calls are reference counted so that only the first
 3315  * "on" request actually has an effect, as does the final "off" request.
 3316  * Results are undefined if the "off" and "on" requests are not matched.
 3317  */
 3318 int
 3319 ifpromisc(struct ifnet *ifp, int pswitch)
 3320 {
 3321         int error;
 3322         int oldflags = ifp->if_flags;
 3323 
 3324         error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
 3325                            &ifp->if_pcount, pswitch);
 3326         /* If promiscuous mode status has changed, log a message */
 3327         if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
 3328             log_promisc_mode_change)
 3329                 if_printf(ifp, "promiscuous mode %s\n",
 3330                     (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
 3331         return (error);
 3332 }
 3333 
 3334 /*
 3335  * Return interface configuration
 3336  * of system.  List may be used
 3337  * in later ioctl's (above) to get
 3338  * other information.
 3339  */
 3340 /*ARGSUSED*/
 3341 static int
 3342 ifconf(u_long cmd, caddr_t data)
 3343 {
 3344         struct ifconf *ifc = (struct ifconf *)data;
 3345         struct ifnet *ifp;
 3346         struct ifaddr *ifa;
 3347         struct ifreq ifr;
 3348         struct sbuf *sb;
 3349         int error, full = 0, valid_len, max_len;
 3350 
 3351         /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
 3352         max_len = MAXPHYS - 1;
 3353 
 3354         /* Prevent hostile input from being able to crash the system */
 3355         if (ifc->ifc_len <= 0)
 3356                 return (EINVAL);
 3357 
 3358 again:
 3359         if (ifc->ifc_len <= max_len) {
 3360                 max_len = ifc->ifc_len;
 3361                 full = 1;
 3362         }
 3363         sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
 3364         max_len = 0;
 3365         valid_len = 0;
 3366 
 3367         IFNET_RLOCK();
 3368         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 3369                 int addrs;
 3370 
 3371                 /*
 3372                  * Zero the ifr to make sure we don't disclose the contents
 3373                  * of the stack.
 3374                  */
 3375                 memset(&ifr, 0, sizeof(ifr));
 3376 
 3377                 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
 3378                     >= sizeof(ifr.ifr_name)) {
 3379                         sbuf_delete(sb);
 3380                         IFNET_RUNLOCK();
 3381                         return (ENAMETOOLONG);
 3382                 }
 3383 
 3384                 addrs = 0;
 3385                 IF_ADDR_RLOCK(ifp);
 3386                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 3387                         struct sockaddr *sa = ifa->ifa_addr;
 3388 
 3389                         if (prison_if(curthread->td_ucred, sa) != 0)
 3390                                 continue;
 3391                         addrs++;
 3392                         if (sa->sa_len <= sizeof(*sa)) {
 3393                                 if (sa->sa_len < sizeof(*sa)) {
 3394                                         memset(&ifr.ifr_ifru.ifru_addr, 0,
 3395                                             sizeof(ifr.ifr_ifru.ifru_addr));
 3396                                         memcpy(&ifr.ifr_ifru.ifru_addr, sa,
 3397                                             sa->sa_len);
 3398                                 } else
 3399                                         ifr.ifr_ifru.ifru_addr = *sa;
 3400                                 sbuf_bcat(sb, &ifr, sizeof(ifr));
 3401                                 max_len += sizeof(ifr);
 3402                         } else {
 3403                                 sbuf_bcat(sb, &ifr,
 3404                                     offsetof(struct ifreq, ifr_addr));
 3405                                 max_len += offsetof(struct ifreq, ifr_addr);
 3406                                 sbuf_bcat(sb, sa, sa->sa_len);
 3407                                 max_len += sa->sa_len;
 3408                         }
 3409 
 3410                         if (sbuf_error(sb) == 0)
 3411                                 valid_len = sbuf_len(sb);
 3412                 }
 3413                 IF_ADDR_RUNLOCK(ifp);
 3414                 if (addrs == 0) {
 3415                         sbuf_bcat(sb, &ifr, sizeof(ifr));
 3416                         max_len += sizeof(ifr);
 3417 
 3418                         if (sbuf_error(sb) == 0)
 3419                                 valid_len = sbuf_len(sb);
 3420                 }
 3421         }
 3422         IFNET_RUNLOCK();
 3423 
 3424         /*
 3425          * If we didn't allocate enough space (uncommon), try again.  If
 3426          * we have already allocated as much space as we are allowed,
 3427          * return what we've got.
 3428          */
 3429         if (valid_len != max_len && !full) {
 3430                 sbuf_delete(sb);
 3431                 goto again;
 3432         }
 3433 
 3434         ifc->ifc_len = valid_len;
 3435         sbuf_finish(sb);
 3436         error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
 3437         sbuf_delete(sb);
 3438         return (error);
 3439 }
 3440 
 3441 /*
 3442  * Just like ifpromisc(), but for all-multicast-reception mode.
 3443  */
 3444 int
 3445 if_allmulti(struct ifnet *ifp, int onswitch)
 3446 {
 3447 
 3448         return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
 3449 }
 3450 
 3451 struct ifmultiaddr *
 3452 if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
 3453 {
 3454         struct ifmultiaddr *ifma;
 3455 
 3456         IF_ADDR_LOCK_ASSERT(ifp);
 3457 
 3458         CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 3459                 if (sa->sa_family == AF_LINK) {
 3460                         if (sa_dl_equal(ifma->ifma_addr, sa))
 3461                                 break;
 3462                 } else {
 3463                         if (sa_equal(ifma->ifma_addr, sa))
 3464                                 break;
 3465                 }
 3466         }
 3467 
 3468         return ifma;
 3469 }
 3470 
 3471 /*
 3472  * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
 3473  * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
 3474  * the ifnet multicast address list here, so the caller must do that and
 3475  * other setup work (such as notifying the device driver).  The reference
 3476  * count is initialized to 1.
 3477  */
 3478 static struct ifmultiaddr *
 3479 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
 3480     int mflags)
 3481 {
 3482         struct ifmultiaddr *ifma;
 3483         struct sockaddr *dupsa;
 3484 
 3485         ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
 3486             M_ZERO);
 3487         if (ifma == NULL)
 3488                 return (NULL);
 3489 
 3490         dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
 3491         if (dupsa == NULL) {
 3492                 free(ifma, M_IFMADDR);
 3493                 return (NULL);
 3494         }
 3495         bcopy(sa, dupsa, sa->sa_len);
 3496         ifma->ifma_addr = dupsa;
 3497 
 3498         ifma->ifma_ifp = ifp;
 3499         ifma->ifma_refcount = 1;
 3500         ifma->ifma_protospec = NULL;
 3501 
 3502         if (llsa == NULL) {
 3503                 ifma->ifma_lladdr = NULL;
 3504                 return (ifma);
 3505         }
 3506 
 3507         dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
 3508         if (dupsa == NULL) {
 3509                 free(ifma->ifma_addr, M_IFMADDR);
 3510                 free(ifma, M_IFMADDR);
 3511                 return (NULL);
 3512         }
 3513         bcopy(llsa, dupsa, llsa->sa_len);
 3514         ifma->ifma_lladdr = dupsa;
 3515 
 3516         return (ifma);
 3517 }
 3518 
 3519 /*
 3520  * if_freemulti: free ifmultiaddr structure and possibly attached related
 3521  * addresses.  The caller is responsible for implementing reference
 3522  * counting, notifying the driver, handling routing messages, and releasing
 3523  * any dependent link layer state.
 3524  */
 3525 #ifdef MCAST_VERBOSE
 3526 extern void kdb_backtrace(void);
 3527 #endif
 3528 static void
 3529 if_freemulti_internal(struct ifmultiaddr *ifma)
 3530 {
 3531 
 3532         KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
 3533             ifma->ifma_refcount));
 3534 
 3535         if (ifma->ifma_lladdr != NULL)
 3536                 free(ifma->ifma_lladdr, M_IFMADDR);
 3537 #ifdef MCAST_VERBOSE
 3538         kdb_backtrace();
 3539         printf("%s freeing ifma: %p\n", __func__, ifma);
 3540 #endif
 3541         free(ifma->ifma_addr, M_IFMADDR);
 3542         free(ifma, M_IFMADDR);
 3543 }
 3544 
 3545 static void
 3546 if_destroymulti(epoch_context_t ctx)
 3547 {
 3548         struct ifmultiaddr *ifma;
 3549 
 3550         ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx);
 3551         if_freemulti_internal(ifma);
 3552 }
 3553 
 3554 void
 3555 if_freemulti(struct ifmultiaddr *ifma)
 3556 {
 3557         KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d",
 3558             ifma->ifma_refcount));
 3559 
 3560         epoch_call(net_epoch_preempt, &ifma->ifma_epoch_ctx, if_destroymulti);
 3561 }
 3562 
 3563 
 3564 /*
 3565  * Register an additional multicast address with a network interface.
 3566  *
 3567  * - If the address is already present, bump the reference count on the
 3568  *   address and return.
 3569  * - If the address is not link-layer, look up a link layer address.
 3570  * - Allocate address structures for one or both addresses, and attach to the
 3571  *   multicast address list on the interface.  If automatically adding a link
 3572  *   layer address, the protocol address will own a reference to the link
 3573  *   layer address, to be freed when it is freed.
 3574  * - Notify the network device driver of an addition to the multicast address
 3575  *   list.
 3576  *
 3577  * 'sa' points to caller-owned memory with the desired multicast address.
 3578  *
 3579  * 'retifma' will be used to return a pointer to the resulting multicast
 3580  * address reference, if desired.
 3581  */
 3582 int
 3583 if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
 3584     struct ifmultiaddr **retifma)
 3585 {
 3586         struct ifmultiaddr *ifma, *ll_ifma;
 3587         struct sockaddr *llsa;
 3588         struct sockaddr_dl sdl;
 3589         int error;
 3590 
 3591 #ifdef INET
 3592         IN_MULTI_LIST_UNLOCK_ASSERT();
 3593 #endif
 3594 #ifdef INET6
 3595         IN6_MULTI_LIST_UNLOCK_ASSERT();
 3596 #endif
 3597         /*
 3598          * If the address is already present, return a new reference to it;
 3599          * otherwise, allocate storage and set up a new address.
 3600          */
 3601         IF_ADDR_WLOCK(ifp);
 3602         ifma = if_findmulti(ifp, sa);
 3603         if (ifma != NULL) {
 3604                 ifma->ifma_refcount++;
 3605                 if (retifma != NULL)
 3606                         *retifma = ifma;
 3607                 IF_ADDR_WUNLOCK(ifp);
 3608                 return (0);
 3609         }
 3610 
 3611         /*
 3612          * The address isn't already present; resolve the protocol address
 3613          * into a link layer address, and then look that up, bump its
 3614          * refcount or allocate an ifma for that also.
 3615          * Most link layer resolving functions returns address data which
 3616          * fits inside default sockaddr_dl structure. However callback
 3617          * can allocate another sockaddr structure, in that case we need to
 3618          * free it later.
 3619          */
 3620         llsa = NULL;
 3621         ll_ifma = NULL;
 3622         if (ifp->if_resolvemulti != NULL) {
 3623                 /* Provide called function with buffer size information */
 3624                 sdl.sdl_len = sizeof(sdl);
 3625                 llsa = (struct sockaddr *)&sdl;
 3626                 error = ifp->if_resolvemulti(ifp, &llsa, sa);
 3627                 if (error)
 3628                         goto unlock_out;
 3629         }
 3630 
 3631         /*
 3632          * Allocate the new address.  Don't hook it up yet, as we may also
 3633          * need to allocate a link layer multicast address.
 3634          */
 3635         ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
 3636         if (ifma == NULL) {
 3637                 error = ENOMEM;
 3638                 goto free_llsa_out;
 3639         }
 3640 
 3641         /*
 3642          * If a link layer address is found, we'll need to see if it's
 3643          * already present in the address list, or allocate is as well.
 3644          * When this block finishes, the link layer address will be on the
 3645          * list.
 3646          */
 3647         if (llsa != NULL) {
 3648                 ll_ifma = if_findmulti(ifp, llsa);
 3649                 if (ll_ifma == NULL) {
 3650                         ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
 3651                         if (ll_ifma == NULL) {
 3652                                 --ifma->ifma_refcount;
 3653                                 if_freemulti(ifma);
 3654                                 error = ENOMEM;
 3655                                 goto free_llsa_out;
 3656                         }
 3657                         ll_ifma->ifma_flags |= IFMA_F_ENQUEUED;
 3658                         CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
 3659                             ifma_link);
 3660                 } else
 3661                         ll_ifma->ifma_refcount++;
 3662                 ifma->ifma_llifma = ll_ifma;
 3663         }
 3664 
 3665         /*
 3666          * We now have a new multicast address, ifma, and possibly a new or
 3667          * referenced link layer address.  Add the primary address to the
 3668          * ifnet address list.
 3669          */
 3670         ifma->ifma_flags |= IFMA_F_ENQUEUED;
 3671         CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 3672 
 3673         if (retifma != NULL)
 3674                 *retifma = ifma;
 3675 
 3676         /*
 3677          * Must generate the message while holding the lock so that 'ifma'
 3678          * pointer is still valid.
 3679          */
 3680         rt_newmaddrmsg(RTM_NEWMADDR, ifma);
 3681         IF_ADDR_WUNLOCK(ifp);
 3682 
 3683         /*
 3684          * We are certain we have added something, so call down to the
 3685          * interface to let them know about it.
 3686          */
 3687         if (ifp->if_ioctl != NULL) {
 3688                 (void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
 3689         }
 3690 
 3691         if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
 3692                 link_free_sdl(llsa);
 3693 
 3694         return (0);
 3695 
 3696 free_llsa_out:
 3697         if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
 3698                 link_free_sdl(llsa);
 3699 
 3700 unlock_out:
 3701         IF_ADDR_WUNLOCK(ifp);
 3702         return (error);
 3703 }
 3704 
 3705 /*
 3706  * Delete a multicast group membership by network-layer group address.
 3707  *
 3708  * Returns ENOENT if the entry could not be found. If ifp no longer
 3709  * exists, results are undefined. This entry point should only be used
 3710  * from subsystems which do appropriate locking to hold ifp for the
 3711  * duration of the call.
 3712  * Network-layer protocol domains must use if_delmulti_ifma().
 3713  */
 3714 int
 3715 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
 3716 {
 3717         struct ifmultiaddr *ifma;
 3718         int lastref;
 3719 #ifdef INVARIANTS
 3720         struct ifnet *oifp;
 3721 
 3722         IFNET_RLOCK_NOSLEEP();
 3723         CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
 3724                 if (ifp == oifp)
 3725                         break;
 3726         if (ifp != oifp)
 3727                 ifp = NULL;
 3728         IFNET_RUNLOCK_NOSLEEP();
 3729 
 3730         KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
 3731 #endif
 3732         if (ifp == NULL)
 3733                 return (ENOENT);
 3734 
 3735         IF_ADDR_WLOCK(ifp);
 3736         lastref = 0;
 3737         ifma = if_findmulti(ifp, sa);
 3738         if (ifma != NULL)
 3739                 lastref = if_delmulti_locked(ifp, ifma, 0);
 3740         IF_ADDR_WUNLOCK(ifp);
 3741 
 3742         if (ifma == NULL)
 3743                 return (ENOENT);
 3744 
 3745         if (lastref && ifp->if_ioctl != NULL) {
 3746                 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 3747         }
 3748 
 3749         return (0);
 3750 }
 3751 
 3752 /*
 3753  * Delete all multicast group membership for an interface.
 3754  * Should be used to quickly flush all multicast filters.
 3755  */
 3756 void
 3757 if_delallmulti(struct ifnet *ifp)
 3758 {
 3759         struct ifmultiaddr *ifma;
 3760         struct ifmultiaddr *next;
 3761 
 3762         IF_ADDR_WLOCK(ifp);
 3763         CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
 3764                 if_delmulti_locked(ifp, ifma, 0);
 3765         IF_ADDR_WUNLOCK(ifp);
 3766 }
 3767 
 3768 void
 3769 if_delmulti_ifma(struct ifmultiaddr *ifma)
 3770 {
 3771         if_delmulti_ifma_flags(ifma, 0);
 3772 }
 3773 
 3774 /*
 3775  * Delete a multicast group membership by group membership pointer.
 3776  * Network-layer protocol domains must use this routine.
 3777  *
 3778  * It is safe to call this routine if the ifp disappeared.
 3779  */
 3780 void
 3781 if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags)
 3782 {
 3783         struct ifnet *ifp;
 3784         int lastref;
 3785         MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma);
 3786 #ifdef INET
 3787         IN_MULTI_LIST_UNLOCK_ASSERT();
 3788 #endif
 3789         ifp = ifma->ifma_ifp;
 3790 #ifdef DIAGNOSTIC
 3791         if (ifp == NULL) {
 3792                 printf("%s: ifma_ifp seems to be detached\n", __func__);
 3793         } else {
 3794                 struct ifnet *oifp;
 3795 
 3796                 IFNET_RLOCK_NOSLEEP();
 3797                 CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link)
 3798                         if (ifp == oifp)
 3799                                 break;
 3800                 if (ifp != oifp)
 3801                         ifp = NULL;
 3802                 IFNET_RUNLOCK_NOSLEEP();
 3803         }
 3804 #endif
 3805         /*
 3806          * If and only if the ifnet instance exists: Acquire the address lock.
 3807          */
 3808         if (ifp != NULL)
 3809                 IF_ADDR_WLOCK(ifp);
 3810 
 3811         lastref = if_delmulti_locked(ifp, ifma, flags);
 3812 
 3813         if (ifp != NULL) {
 3814                 /*
 3815                  * If and only if the ifnet instance exists:
 3816                  *  Release the address lock.
 3817                  *  If the group was left: update the hardware hash filter.
 3818                  */
 3819                 IF_ADDR_WUNLOCK(ifp);
 3820                 if (lastref && ifp->if_ioctl != NULL) {
 3821                         (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 3822                 }
 3823         }
 3824 }
 3825 
 3826 /*
 3827  * Perform deletion of network-layer and/or link-layer multicast address.
 3828  *
 3829  * Return 0 if the reference count was decremented.
 3830  * Return 1 if the final reference was released, indicating that the
 3831  * hardware hash filter should be reprogrammed.
 3832  */
 3833 static int
 3834 if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
 3835 {
 3836         struct ifmultiaddr *ll_ifma;
 3837 
 3838         if (ifp != NULL && ifma->ifma_ifp != NULL) {
 3839                 KASSERT(ifma->ifma_ifp == ifp,
 3840                     ("%s: inconsistent ifp %p", __func__, ifp));
 3841                 IF_ADDR_WLOCK_ASSERT(ifp);
 3842         }
 3843 
 3844         ifp = ifma->ifma_ifp;
 3845         MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : "");
 3846 
 3847         /*
 3848          * If the ifnet is detaching, null out references to ifnet,
 3849          * so that upper protocol layers will notice, and not attempt
 3850          * to obtain locks for an ifnet which no longer exists. The
 3851          * routing socket announcement must happen before the ifnet
 3852          * instance is detached from the system.
 3853          */
 3854         if (detaching) {
 3855 #ifdef DIAGNOSTIC
 3856                 printf("%s: detaching ifnet instance %p\n", __func__, ifp);
 3857 #endif
 3858                 /*
 3859                  * ifp may already be nulled out if we are being reentered
 3860                  * to delete the ll_ifma.
 3861                  */
 3862                 if (ifp != NULL) {
 3863                         rt_newmaddrmsg(RTM_DELMADDR, ifma);
 3864                         ifma->ifma_ifp = NULL;
 3865                 }
 3866         }
 3867 
 3868         if (--ifma->ifma_refcount > 0)
 3869                 return 0;
 3870 
 3871         if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) {
 3872                 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 3873                 ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 3874         }
 3875         /*
 3876          * If this ifma is a network-layer ifma, a link-layer ifma may
 3877          * have been associated with it. Release it first if so.
 3878          */
 3879         ll_ifma = ifma->ifma_llifma;
 3880         if (ll_ifma != NULL) {
 3881                 KASSERT(ifma->ifma_lladdr != NULL,
 3882                     ("%s: llifma w/o lladdr", __func__));
 3883                 if (detaching)
 3884                         ll_ifma->ifma_ifp = NULL;       /* XXX */
 3885                 if (--ll_ifma->ifma_refcount == 0) {
 3886                         if (ifp != NULL) {
 3887                                 if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
 3888                                         CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr,
 3889                                                 ifma_link);
 3890                                         ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 3891                                 }
 3892                         }
 3893                         if_freemulti(ll_ifma);
 3894                 }
 3895         }
 3896 #ifdef INVARIANTS
 3897         if (ifp) {
 3898                 struct ifmultiaddr *ifmatmp;
 3899 
 3900                 CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link)
 3901                         MPASS(ifma != ifmatmp);
 3902         }
 3903 #endif
 3904         if_freemulti(ifma);
 3905         /*
 3906          * The last reference to this instance of struct ifmultiaddr
 3907          * was released; the hardware should be notified of this change.
 3908          */
 3909         return 1;
 3910 }
 3911 
 3912 /*
 3913  * Set the link layer address on an interface.
 3914  *
 3915  * At this time we only support certain types of interfaces,
 3916  * and we don't allow the length of the address to change.
 3917  *
 3918  * Set noinline to be dtrace-friendly
 3919  */
 3920 __noinline int
 3921 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 3922 {
 3923         struct sockaddr_dl *sdl;
 3924         struct ifaddr *ifa;
 3925         struct ifreq ifr;
 3926         int rc;
 3927 
 3928         rc = 0;
 3929         NET_EPOCH_ENTER();
 3930         ifa = ifp->if_addr;
 3931         if (ifa == NULL) {
 3932                 rc = EINVAL;
 3933                 goto out;
 3934         }
 3935 
 3936         sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 3937         if (sdl == NULL) {
 3938                 rc = EINVAL;
 3939                 goto out;
 3940         }
 3941         if (len != sdl->sdl_alen) {     /* don't allow length to change */
 3942                 rc = EINVAL;
 3943                 goto out;
 3944         }
 3945         switch (ifp->if_type) {
 3946         case IFT_ETHER:
 3947         case IFT_XETHER:
 3948         case IFT_L2VLAN:
 3949         case IFT_BRIDGE:
 3950         case IFT_IEEE8023ADLAG:
 3951                 bcopy(lladdr, LLADDR(sdl), len);
 3952                 break;
 3953         default:
 3954                 rc = ENODEV;
 3955                 goto out;
 3956         }
 3957 
 3958         /*
 3959          * If the interface is already up, we need
 3960          * to re-init it in order to reprogram its
 3961          * address filter.
 3962          */
 3963         NET_EPOCH_EXIT();
 3964         if ((ifp->if_flags & IFF_UP) != 0) {
 3965                 if (ifp->if_ioctl) {
 3966                         ifp->if_flags &= ~IFF_UP;
 3967                         ifr.ifr_flags = ifp->if_flags & 0xffff;
 3968                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
 3969                         (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 3970                         ifp->if_flags |= IFF_UP;
 3971                         ifr.ifr_flags = ifp->if_flags & 0xffff;
 3972                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
 3973                         (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 3974                 }
 3975         }
 3976         EVENTHANDLER_INVOKE(iflladdr_event, ifp);
 3977         return (0);
 3978  out:
 3979         NET_EPOCH_EXIT();
 3980         return (rc);
 3981 }
 3982 
 3983 /*
 3984  * Compat function for handling basic encapsulation requests.
 3985  * Not converted stacks (FDDI, IB, ..) supports traditional
 3986  * output model: ARP (and other similar L2 protocols) are handled
 3987  * inside output routine, arpresolve/nd6_resolve() returns MAC
 3988  * address instead of full prepend.
 3989  *
 3990  * This function creates calculated header==MAC for IPv4/IPv6 and
 3991  * returns EAFNOSUPPORT (which is then handled in ARP code) for other
 3992  * address families.
 3993  */
 3994 static int
 3995 if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
 3996 {
 3997 
 3998         if (req->rtype != IFENCAP_LL)
 3999                 return (EOPNOTSUPP);
 4000 
 4001         if (req->bufsize < req->lladdr_len)
 4002                 return (ENOMEM);
 4003 
 4004         switch (req->family) {
 4005         case AF_INET:
 4006         case AF_INET6:
 4007                 break;
 4008         default:
 4009                 return (EAFNOSUPPORT);
 4010         }
 4011 
 4012         /* Copy lladdr to storage as is */
 4013         memmove(req->buf, req->lladdr, req->lladdr_len);
 4014         req->bufsize = req->lladdr_len;
 4015         req->lladdr_off = 0;
 4016 
 4017         return (0);
 4018 }
 4019 
 4020 /*
 4021  * Tunnel interfaces can nest, also they may cause infinite recursion
 4022  * calls when misconfigured. We'll prevent this by detecting loops.
 4023  * High nesting level may cause stack exhaustion. We'll prevent this
 4024  * by introducing upper limit.
 4025  *
 4026  * Return 0, if tunnel nesting count is equal or less than limit.
 4027  */
 4028 int
 4029 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie,
 4030     int limit)
 4031 {
 4032         struct m_tag *mtag;
 4033         int count;
 4034 
 4035         count = 1;
 4036         mtag = NULL;
 4037         while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) {
 4038                 if (*(struct ifnet **)(mtag + 1) == ifp) {
 4039                         log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
 4040                         return (EIO);
 4041                 }
 4042                 count++;
 4043         }
 4044         if (count > limit) {
 4045                 log(LOG_NOTICE,
 4046                     "%s: if_output recursively called too many times(%d)\n",
 4047                     if_name(ifp), count);
 4048                 return (EIO);
 4049         }
 4050         mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT);
 4051         if (mtag == NULL)
 4052                 return (ENOMEM);
 4053         *(struct ifnet **)(mtag + 1) = ifp;
 4054         m_tag_prepend(m, mtag);
 4055         return (0);
 4056 }
 4057 
 4058 /*
 4059  * Get the link layer address that was read from the hardware at attach.
 4060  *
 4061  * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type
 4062  * their component interfaces as IFT_IEEE8023ADLAG.
 4063  */
 4064 int
 4065 if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr)
 4066 {
 4067 
 4068         if (ifp->if_hw_addr == NULL)
 4069                 return (ENODEV);
 4070 
 4071         switch (ifp->if_type) {
 4072         case IFT_ETHER:
 4073         case IFT_IEEE8023ADLAG:
 4074                 bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen);
 4075                 return (0);
 4076         default:
 4077                 return (ENODEV);
 4078         }
 4079 }
 4080 
 4081 /*
 4082  * The name argument must be a pointer to storage which will last as
 4083  * long as the interface does.  For physical devices, the result of
 4084  * device_get_name(dev) is a good choice and for pseudo-devices a
 4085  * static string works well.
 4086  */
 4087 void
 4088 if_initname(struct ifnet *ifp, const char *name, int unit)
 4089 {
 4090         ifp->if_dname = name;
 4091         ifp->if_dunit = unit;
 4092         if (unit != IF_DUNIT_NONE)
 4093                 snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
 4094         else
 4095                 strlcpy(ifp->if_xname, name, IFNAMSIZ);
 4096 }
 4097 
 4098 int
 4099 if_printf(struct ifnet *ifp, const char *fmt, ...)
 4100 {
 4101         char if_fmt[256];
 4102         va_list ap;
 4103 
 4104         snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt);
 4105         va_start(ap, fmt);
 4106         vlog(LOG_INFO, if_fmt, ap);
 4107         va_end(ap);
 4108         return (0);
 4109 }
 4110 
 4111 void
 4112 if_start(struct ifnet *ifp)
 4113 {
 4114 
 4115         (*(ifp)->if_start)(ifp);
 4116 }
 4117 
 4118 /*
 4119  * Backwards compatibility interface for drivers 
 4120  * that have not implemented it
 4121  */
 4122 static int
 4123 if_transmit(struct ifnet *ifp, struct mbuf *m)
 4124 {
 4125         int error;
 4126 
 4127         IFQ_HANDOFF(ifp, m, error);
 4128         return (error);
 4129 }
 4130 
 4131 static void
 4132 if_input_default(struct ifnet *ifp __unused, struct mbuf *m)
 4133 {
 4134 
 4135         m_freem(m);
 4136 }
 4137 
 4138 int
 4139 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
 4140 {
 4141         int active = 0;
 4142 
 4143         IF_LOCK(ifq);
 4144         if (_IF_QFULL(ifq)) {
 4145                 IF_UNLOCK(ifq);
 4146                 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 4147                 m_freem(m);
 4148                 return (0);
 4149         }
 4150         if (ifp != NULL) {
 4151                 if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
 4152                 if (m->m_flags & (M_BCAST|M_MCAST))
 4153                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 4154                 active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
 4155         }
 4156         _IF_ENQUEUE(ifq, m);
 4157         IF_UNLOCK(ifq);
 4158         if (ifp != NULL && !active)
 4159                 (*(ifp)->if_start)(ifp);
 4160         return (1);
 4161 }
 4162 
 4163 void
 4164 if_register_com_alloc(u_char type,
 4165     if_com_alloc_t *a, if_com_free_t *f)
 4166 {
 4167         
 4168         KASSERT(if_com_alloc[type] == NULL,
 4169             ("if_register_com_alloc: %d already registered", type));
 4170         KASSERT(if_com_free[type] == NULL,
 4171             ("if_register_com_alloc: %d free already registered", type));
 4172 
 4173         if_com_alloc[type] = a;
 4174         if_com_free[type] = f;
 4175 }
 4176 
 4177 void
 4178 if_deregister_com_alloc(u_char type)
 4179 {
 4180         
 4181         KASSERT(if_com_alloc[type] != NULL,
 4182             ("if_deregister_com_alloc: %d not registered", type));
 4183         KASSERT(if_com_free[type] != NULL,
 4184             ("if_deregister_com_alloc: %d free not registered", type));
 4185 
 4186         /*
 4187          * Ensure all pending EPOCH(9) callbacks have been executed. This
 4188          * fixes issues about late invocation of if_destroy(), which leads
 4189          * to memory leak from if_com_alloc[type] allocated if_l2com.
 4190          */
 4191         epoch_drain_callbacks(net_epoch_preempt);
 4192 
 4193         if_com_alloc[type] = NULL;
 4194         if_com_free[type] = NULL;
 4195 }
 4196 
 4197 /* API for driver access to network stack owned ifnet.*/
 4198 uint64_t
 4199 if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
 4200 {
 4201         uint64_t oldbrate;
 4202 
 4203         oldbrate = ifp->if_baudrate;
 4204         ifp->if_baudrate = baudrate;
 4205         return (oldbrate);
 4206 }
 4207 
 4208 uint64_t
 4209 if_getbaudrate(if_t ifp)
 4210 {
 4211 
 4212         return (((struct ifnet *)ifp)->if_baudrate);
 4213 }
 4214 
 4215 int
 4216 if_setcapabilities(if_t ifp, int capabilities)
 4217 {
 4218         ((struct ifnet *)ifp)->if_capabilities = capabilities;
 4219         return (0);
 4220 }
 4221 
 4222 int
 4223 if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
 4224 {
 4225         ((struct ifnet *)ifp)->if_capabilities |= setbit;
 4226         ((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
 4227 
 4228         return (0);
 4229 }
 4230 
 4231 int
 4232 if_getcapabilities(if_t ifp)
 4233 {
 4234         return ((struct ifnet *)ifp)->if_capabilities;
 4235 }
 4236 
 4237 int 
 4238 if_setcapenable(if_t ifp, int capabilities)
 4239 {
 4240         ((struct ifnet *)ifp)->if_capenable = capabilities;
 4241         return (0);
 4242 }
 4243 
 4244 int 
 4245 if_setcapenablebit(if_t ifp, int setcap, int clearcap)
 4246 {
 4247         if(setcap) 
 4248                 ((struct ifnet *)ifp)->if_capenable |= setcap;
 4249         if(clearcap)
 4250                 ((struct ifnet *)ifp)->if_capenable &= ~clearcap;
 4251 
 4252         return (0);
 4253 }
 4254 
 4255 const char *
 4256 if_getdname(if_t ifp)
 4257 {
 4258         return ((struct ifnet *)ifp)->if_dname;
 4259 }
 4260 
 4261 int 
 4262 if_togglecapenable(if_t ifp, int togglecap)
 4263 {
 4264         ((struct ifnet *)ifp)->if_capenable ^= togglecap;
 4265         return (0);
 4266 }
 4267 
 4268 int
 4269 if_getcapenable(if_t ifp)
 4270 {
 4271         return ((struct ifnet *)ifp)->if_capenable;
 4272 }
 4273 
 4274 /*
 4275  * This is largely undesirable because it ties ifnet to a device, but does
 4276  * provide flexiblity for an embedded product vendor. Should be used with
 4277  * the understanding that it violates the interface boundaries, and should be
 4278  * a last resort only.
 4279  */
 4280 int
 4281 if_setdev(if_t ifp, void *dev)
 4282 {
 4283         return (0);
 4284 }
 4285 
 4286 int
 4287 if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
 4288 {
 4289         ((struct ifnet *)ifp)->if_drv_flags |= set_flags;
 4290         ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
 4291 
 4292         return (0);
 4293 }
 4294 
 4295 int
 4296 if_getdrvflags(if_t ifp)
 4297 {
 4298         return ((struct ifnet *)ifp)->if_drv_flags;
 4299 }
 4300  
 4301 int
 4302 if_setdrvflags(if_t ifp, int flags)
 4303 {
 4304         ((struct ifnet *)ifp)->if_drv_flags = flags;
 4305         return (0);
 4306 }
 4307 
 4308 
 4309 int
 4310 if_setflags(if_t ifp, int flags)
 4311 {
 4312         ((struct ifnet *)ifp)->if_flags = flags;
 4313         return (0);
 4314 }
 4315 
 4316 int
 4317 if_setflagbits(if_t ifp, int set, int clear)
 4318 {
 4319         ((struct ifnet *)ifp)->if_flags |= set;
 4320         ((struct ifnet *)ifp)->if_flags &= ~clear;
 4321 
 4322         return (0);
 4323 }
 4324 
 4325 int
 4326 if_getflags(if_t ifp)
 4327 {
 4328         return ((struct ifnet *)ifp)->if_flags;
 4329 }
 4330 
 4331 int
 4332 if_clearhwassist(if_t ifp)
 4333 {
 4334         ((struct ifnet *)ifp)->if_hwassist = 0;
 4335         return (0);
 4336 }
 4337 
 4338 int
 4339 if_sethwassistbits(if_t ifp, int toset, int toclear)
 4340 {
 4341         ((struct ifnet *)ifp)->if_hwassist |= toset;
 4342         ((struct ifnet *)ifp)->if_hwassist &= ~toclear;
 4343 
 4344         return (0);
 4345 }
 4346 
 4347 int
 4348 if_sethwassist(if_t ifp, int hwassist_bit)
 4349 {
 4350         ((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
 4351         return (0);
 4352 }
 4353 
 4354 int
 4355 if_gethwassist(if_t ifp)
 4356 {
 4357         return ((struct ifnet *)ifp)->if_hwassist;
 4358 }
 4359 
 4360 int
 4361 if_setmtu(if_t ifp, int mtu)
 4362 {
 4363         ((struct ifnet *)ifp)->if_mtu = mtu;
 4364         return (0);
 4365 }
 4366 
 4367 int
 4368 if_getmtu(if_t ifp)
 4369 {
 4370         return ((struct ifnet *)ifp)->if_mtu;
 4371 }
 4372 
 4373 int
 4374 if_getmtu_family(if_t ifp, int family)
 4375 {
 4376         struct domain *dp;
 4377 
 4378         for (dp = domains; dp; dp = dp->dom_next) {
 4379                 if (dp->dom_family == family && dp->dom_ifmtu != NULL)
 4380                         return (dp->dom_ifmtu((struct ifnet *)ifp));
 4381         }
 4382 
 4383         return (((struct ifnet *)ifp)->if_mtu);
 4384 }
 4385 
 4386 /*
 4387  * Methods for drivers to access interface unicast and multicast
 4388  * link level addresses.  Driver shall not know 'struct ifaddr' neither
 4389  * 'struct ifmultiaddr'.
 4390  */
 4391 u_int
 4392 if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
 4393 {
 4394         struct ifaddr *ifa;
 4395         u_int count;
 4396 
 4397         MPASS(cb);
 4398 
 4399         count = 0;
 4400         IF_ADDR_RLOCK(ifp);
 4401         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 4402                 if (ifa->ifa_addr->sa_family != AF_LINK)
 4403                         continue;
 4404                 count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr,
 4405                     count);
 4406         }
 4407         IF_ADDR_RUNLOCK(ifp);
 4408 
 4409         return (count);
 4410 }
 4411 
 4412 u_int
 4413 if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg)
 4414 {
 4415         struct ifmultiaddr *ifma;
 4416         u_int count;
 4417 
 4418         MPASS(cb);
 4419 
 4420         count = 0;
 4421         IF_ADDR_RLOCK(ifp);
 4422         CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 4423                 if (ifma->ifma_addr->sa_family != AF_LINK)
 4424                         continue;
 4425                 count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr,
 4426                     count);
 4427         }
 4428         IF_ADDR_RUNLOCK(ifp);
 4429 
 4430         return (count);
 4431 }
 4432 
 4433 int
 4434 if_setsoftc(if_t ifp, void *softc)
 4435 {
 4436         ((struct ifnet *)ifp)->if_softc = softc;
 4437         return (0);
 4438 }
 4439 
 4440 void *
 4441 if_getsoftc(if_t ifp)
 4442 {
 4443         return ((struct ifnet *)ifp)->if_softc;
 4444 }
 4445 
 4446 void 
 4447 if_setrcvif(struct mbuf *m, if_t ifp)
 4448 {
 4449         m->m_pkthdr.rcvif = (struct ifnet *)ifp;
 4450 }
 4451 
 4452 void 
 4453 if_setvtag(struct mbuf *m, uint16_t tag)
 4454 {
 4455         m->m_pkthdr.ether_vtag = tag;   
 4456 }
 4457 
 4458 uint16_t
 4459 if_getvtag(struct mbuf *m)
 4460 {
 4461 
 4462         return (m->m_pkthdr.ether_vtag);
 4463 }
 4464 
 4465 int
 4466 if_sendq_empty(if_t ifp)
 4467 {
 4468         return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
 4469 }
 4470 
 4471 struct ifaddr *
 4472 if_getifaddr(if_t ifp)
 4473 {
 4474         return ((struct ifnet *)ifp)->if_addr;
 4475 }
 4476 
 4477 int
 4478 if_getamcount(if_t ifp)
 4479 {
 4480         return ((struct ifnet *)ifp)->if_amcount;
 4481 }
 4482 
 4483 
 4484 int
 4485 if_setsendqready(if_t ifp)
 4486 {
 4487         IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
 4488         return (0);
 4489 }
 4490 
 4491 int
 4492 if_setsendqlen(if_t ifp, int tx_desc_count)
 4493 {
 4494         IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
 4495         ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
 4496 
 4497         return (0);
 4498 }
 4499 
 4500 int
 4501 if_vlantrunkinuse(if_t ifp)
 4502 {
 4503         return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
 4504 }
 4505 
 4506 int
 4507 if_input(if_t ifp, struct mbuf* sendmp)
 4508 {
 4509         (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
 4510         return (0);
 4511 
 4512 }
 4513 
 4514 /* XXX */
 4515 #ifndef ETH_ADDR_LEN
 4516 #define ETH_ADDR_LEN 6
 4517 #endif
 4518 
 4519 int 
 4520 if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
 4521 {
 4522         struct ifmultiaddr *ifma;
 4523         uint8_t *lmta = (uint8_t *)mta;
 4524         int mcnt = 0;
 4525 
 4526         CK_STAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
 4527                 if (ifma->ifma_addr->sa_family != AF_LINK)
 4528                         continue;
 4529 
 4530                 if (mcnt == max)
 4531                         break;
 4532 
 4533                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 4534                     &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
 4535                 mcnt++;
 4536         }
 4537         *cnt = mcnt;
 4538 
 4539         return (0);
 4540 }
 4541 
 4542 int
 4543 if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
 4544 {
 4545         int error;
 4546 
 4547         if_maddr_rlock(ifp);
 4548         error = if_setupmultiaddr(ifp, mta, cnt, max);
 4549         if_maddr_runlock(ifp);
 4550         return (error);
 4551 }
 4552 
 4553 int
 4554 if_multiaddr_count(if_t ifp, int max)
 4555 {
 4556         struct ifmultiaddr *ifma;
 4557         int count;
 4558 
 4559         count = 0;
 4560         if_maddr_rlock(ifp);
 4561         CK_STAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
 4562                 if (ifma->ifma_addr->sa_family != AF_LINK)
 4563                         continue;
 4564                 count++;
 4565                 if (count == max)
 4566                         break;
 4567         }
 4568         if_maddr_runlock(ifp);
 4569         return (count);
 4570 }
 4571 
 4572 int
 4573 if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
 4574 {
 4575         struct ifmultiaddr *ifma;
 4576         int cnt = 0;
 4577 
 4578         if_maddr_rlock(ifp);
 4579         CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 4580                 cnt += filter(arg, ifma, cnt);
 4581         if_maddr_runlock(ifp);
 4582         return (cnt);
 4583 }
 4584 
 4585 struct mbuf *
 4586 if_dequeue(if_t ifp)
 4587 {
 4588         struct mbuf *m;
 4589         IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
 4590 
 4591         return (m);
 4592 }
 4593 
 4594 int
 4595 if_sendq_prepend(if_t ifp, struct mbuf *m)
 4596 {
 4597         IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
 4598         return (0);
 4599 }
 4600 
 4601 int
 4602 if_setifheaderlen(if_t ifp, int len)
 4603 {
 4604         ((struct ifnet *)ifp)->if_hdrlen = len;
 4605         return (0);
 4606 }
 4607 
 4608 caddr_t
 4609 if_getlladdr(if_t ifp)
 4610 {
 4611         return (IF_LLADDR((struct ifnet *)ifp));
 4612 }
 4613 
 4614 void *
 4615 if_gethandle(u_char type)
 4616 {
 4617         return (if_alloc(type));
 4618 }
 4619 
 4620 void
 4621 if_bpfmtap(if_t ifh, struct mbuf *m)
 4622 {
 4623         struct ifnet *ifp = (struct ifnet *)ifh;
 4624 
 4625         BPF_MTAP(ifp, m);
 4626 }
 4627 
 4628 void
 4629 if_etherbpfmtap(if_t ifh, struct mbuf *m)
 4630 {
 4631         struct ifnet *ifp = (struct ifnet *)ifh;
 4632 
 4633         ETHER_BPF_MTAP(ifp, m);
 4634 }
 4635 
 4636 void
 4637 if_vlancap(if_t ifh)
 4638 {
 4639         struct ifnet *ifp = (struct ifnet *)ifh;
 4640         VLAN_CAPABILITIES(ifp);
 4641 }
 4642 
 4643 int
 4644 if_sethwtsomax(if_t ifp, u_int if_hw_tsomax)
 4645 {
 4646 
 4647         ((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax;
 4648         return (0);
 4649 }
 4650 
 4651 int
 4652 if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount)
 4653 {
 4654 
 4655         ((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount;
 4656         return (0);
 4657 }
 4658 
 4659 int
 4660 if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize)
 4661 {
 4662 
 4663         ((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize;
 4664         return (0);
 4665 }
 4666 
 4667 u_int
 4668 if_gethwtsomax(if_t ifp)
 4669 {
 4670 
 4671         return (((struct ifnet *)ifp)->if_hw_tsomax);
 4672 }
 4673 
 4674 u_int
 4675 if_gethwtsomaxsegcount(if_t ifp)
 4676 {
 4677 
 4678         return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount);
 4679 }
 4680 
 4681 u_int
 4682 if_gethwtsomaxsegsize(if_t ifp)
 4683 {
 4684 
 4685         return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize);
 4686 }
 4687 
 4688 void
 4689 if_setinitfn(if_t ifp, void (*init_fn)(void *))
 4690 {
 4691         ((struct ifnet *)ifp)->if_init = init_fn;
 4692 }
 4693 
 4694 void
 4695 if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
 4696 {
 4697         ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
 4698 }
 4699 
 4700 void
 4701 if_setstartfn(if_t ifp, void (*start_fn)(if_t))
 4702 {
 4703         ((struct ifnet *)ifp)->if_start = (void *)start_fn;
 4704 }
 4705 
 4706 void
 4707 if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
 4708 {
 4709         ((struct ifnet *)ifp)->if_transmit = start_fn;
 4710 }
 4711 
 4712 void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
 4713 {
 4714         ((struct ifnet *)ifp)->if_qflush = flush_fn;
 4715         
 4716 }
 4717 
 4718 void
 4719 if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
 4720 {
 4721 
 4722         ifp->if_get_counter = fn;
 4723 }
 4724 
 4725 /* Revisit these - These are inline functions originally. */
 4726 int
 4727 drbr_inuse_drv(if_t ifh, struct buf_ring *br)
 4728 {
 4729         return drbr_inuse(ifh, br);
 4730 }
 4731 
 4732 struct mbuf*
 4733 drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
 4734 {
 4735         return drbr_dequeue(ifh, br);
 4736 }
 4737 
 4738 int
 4739 drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
 4740 {
 4741         return drbr_needs_enqueue(ifh, br);
 4742 }
 4743 
 4744 int
 4745 drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
 4746 {
 4747         return drbr_enqueue(ifh, br, m);
 4748 
 4749 }

Cache object: ba4684e9122ceecf667ce3ce5cfe44a6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.