The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: if.c,v 1.528 2022/11/25 06:18:42 msaitoh Exp $ */
    2 
    3 /*-
    4  * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by William Studenmund and Jason R. Thorpe.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
   34  * All rights reserved.
   35  *
   36  * Redistribution and use in source and binary forms, with or without
   37  * modification, are permitted provided that the following conditions
   38  * are met:
   39  * 1. Redistributions of source code must retain the above copyright
   40  *    notice, this list of conditions and the following disclaimer.
   41  * 2. Redistributions in binary form must reproduce the above copyright
   42  *    notice, this list of conditions and the following disclaimer in the
   43  *    documentation and/or other materials provided with the distribution.
   44  * 3. Neither the name of the project nor the names of its contributors
   45  *    may be used to endorse or promote products derived from this software
   46  *    without specific prior written permission.
   47  *
   48  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   58  * SUCH DAMAGE.
   59  */
   60 
   61 /*
   62  * Copyright (c) 1980, 1986, 1993
   63  *      The Regents of the University of California.  All rights reserved.
   64  *
   65  * Redistribution and use in source and binary forms, with or without
   66  * modification, are permitted provided that the following conditions
   67  * are met:
   68  * 1. Redistributions of source code must retain the above copyright
   69  *    notice, this list of conditions and the following disclaimer.
   70  * 2. Redistributions in binary form must reproduce the above copyright
   71  *    notice, this list of conditions and the following disclaimer in the
   72  *    documentation and/or other materials provided with the distribution.
   73  * 3. Neither the name of the University nor the names of its contributors
   74  *    may be used to endorse or promote products derived from this software
   75  *    without specific prior written permission.
   76  *
   77  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   78  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   79  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   80  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   81  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   82  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   83  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   84  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   85  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   86  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   87  * SUCH DAMAGE.
   88  *
   89  *      @(#)if.c        8.5 (Berkeley) 1/9/95
   90  */
   91 
   92 #include <sys/cdefs.h>
   93 __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.528 2022/11/25 06:18:42 msaitoh Exp $");
   94 
   95 #if defined(_KERNEL_OPT)
   96 #include "opt_inet.h"
   97 #include "opt_ipsec.h"
   98 #include "opt_atalk.h"
   99 #include "opt_wlan.h"
  100 #include "opt_net_mpsafe.h"
  101 #include "opt_mrouting.h"
  102 #endif
  103 
  104 #include <sys/param.h>
  105 #include <sys/mbuf.h>
  106 #include <sys/systm.h>
  107 #include <sys/callout.h>
  108 #include <sys/proc.h>
  109 #include <sys/socket.h>
  110 #include <sys/socketvar.h>
  111 #include <sys/domain.h>
  112 #include <sys/protosw.h>
  113 #include <sys/kernel.h>
  114 #include <sys/ioctl.h>
  115 #include <sys/sysctl.h>
  116 #include <sys/syslog.h>
  117 #include <sys/kauth.h>
  118 #include <sys/kmem.h>
  119 #include <sys/xcall.h>
  120 #include <sys/cpu.h>
  121 #include <sys/intr.h>
  122 #include <sys/module_hook.h>
  123 #include <sys/compat_stub.h>
  124 #include <sys/msan.h>
  125 #include <sys/hook.h>
  126 
  127 #include <net/if.h>
  128 #include <net/if_dl.h>
  129 #include <net/if_ether.h>
  130 #include <net/if_media.h>
  131 #include <net80211/ieee80211.h>
  132 #include <net80211/ieee80211_ioctl.h>
  133 #include <net/if_types.h>
  134 #include <net/route.h>
  135 #include <sys/module.h>
  136 #ifdef NETATALK
  137 #include <netatalk/at_extern.h>
  138 #include <netatalk/at.h>
  139 #endif
  140 #include <net/pfil.h>
  141 #include <netinet/in.h>
  142 #include <netinet/in_var.h>
  143 #include <netinet/ip_encap.h>
  144 #include <net/bpf.h>
  145 
  146 #ifdef INET6
  147 #include <netinet6/in6_var.h>
  148 #include <netinet6/nd6.h>
  149 #endif
  150 
  151 #include "ether.h"
  152 
  153 #include "bridge.h"
  154 #if NBRIDGE > 0
  155 #include <net/if_bridgevar.h>
  156 #endif
  157 
  158 #include "carp.h"
  159 #if NCARP > 0
  160 #include <netinet/ip_carp.h>
  161 #endif
  162 
  163 #include <compat/sys/sockio.h>
  164 
  165 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
  166 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
  167 
  168 /*
  169  * XXX reusing (ifp)->if_snd->ifq_lock rather than having another spin mutex
  170  * for each ifnet.  It doesn't matter because:
  171  * - if IFEF_MPSAFE is enabled, if_snd isn't used and lock contentions on
  172  *   ifq_lock don't happen
  173  * - if IFEF_MPSAFE is disabled, there is no lock contention on ifq_lock
  174  *   because if_snd, if_link_state_change and if_link_state_change_process
  175  *   are all called with KERNEL_LOCK
  176  */
  177 #define IF_LINK_STATE_CHANGE_LOCK(ifp)          \
  178         mutex_enter((ifp)->if_snd.ifq_lock)
  179 #define IF_LINK_STATE_CHANGE_UNLOCK(ifp)        \
  180         mutex_exit((ifp)->if_snd.ifq_lock)
  181 
  182 /*
  183  * Global list of interfaces.
  184  */
  185 /* DEPRECATED. Remove it once kvm(3) users disappeared */
  186 struct ifnet_head               ifnet_list;
  187 
  188 struct pslist_head              ifnet_pslist;
  189 static ifnet_t **               ifindex2ifnet = NULL;
  190 static u_int                    if_index = 1;
  191 static size_t                   if_indexlim = 0;
  192 static uint64_t                 index_gen;
  193 /* Mutex to protect the above objects. */
  194 kmutex_t                        ifnet_mtx __cacheline_aligned;
  195 static struct psref_class       *ifnet_psref_class __read_mostly;
  196 static pserialize_t             ifnet_psz;
  197 static struct workqueue         *ifnet_link_state_wq __read_mostly;
  198 
  199 static struct workqueue         *if_slowtimo_wq __read_mostly;
  200 
  201 static kmutex_t                 if_clone_mtx;
  202 
  203 struct ifnet *lo0ifp;
  204 int     ifqmaxlen = IFQ_MAXLEN;
  205 
  206 struct psref_class              *ifa_psref_class __read_mostly;
  207 
  208 static int      if_delroute_matcher(struct rtentry *, void *);
  209 
  210 static bool if_is_unit(const char *);
  211 static struct if_clone *if_clone_lookup(const char *, int *);
  212 
  213 static LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
  214 static int if_cloners_count;
  215 
  216 /* Packet filtering hook for interfaces. */
  217 pfil_head_t *                   if_pfil __read_mostly;
  218 
  219 static kauth_listener_t if_listener;
  220 
  221 static int doifioctl(struct socket *, u_long, void *, struct lwp *);
  222 static void sysctl_sndq_setup(struct sysctllog **, const char *,
  223     struct ifaltq *);
  224 static void if_slowtimo_intr(void *);
  225 static void if_slowtimo_work(struct work *, void *);
  226 static int sysctl_if_watchdog(SYSCTLFN_PROTO);
  227 static void sysctl_watchdog_setup(struct ifnet *);
  228 static void if_attachdomain1(struct ifnet *);
  229 static int ifconf(u_long, void *);
  230 static int if_transmit(struct ifnet *, struct mbuf *);
  231 static int if_clone_create(const char *);
  232 static int if_clone_destroy(const char *);
  233 static void if_link_state_change_work(struct work *, void *);
  234 static void if_up_locked(struct ifnet *);
  235 static void _if_down(struct ifnet *);
  236 static void if_down_deactivated(struct ifnet *);
  237 
  238 struct if_percpuq {
  239         struct ifnet    *ipq_ifp;
  240         void            *ipq_si;
  241         struct percpu   *ipq_ifqs;      /* struct ifqueue */
  242 };
  243 
  244 static struct mbuf *if_percpuq_dequeue(struct if_percpuq *);
  245 
  246 static void if_percpuq_drops(void *, void *, struct cpu_info *);
  247 static int sysctl_percpuq_drops_handler(SYSCTLFN_PROTO);
  248 static void sysctl_percpuq_setup(struct sysctllog **, const char *,
  249     struct if_percpuq *);
  250 
  251 struct if_deferred_start {
  252         struct ifnet    *ids_ifp;
  253         void            (*ids_if_start)(struct ifnet *);
  254         void            *ids_si;
  255 };
  256 
  257 static void if_deferred_start_softint(void *);
  258 static void if_deferred_start_common(struct ifnet *);
  259 static void if_deferred_start_destroy(struct ifnet *);
  260 
  261 struct if_slowtimo_data {
  262         kmutex_t                isd_lock;
  263         struct callout          isd_ch;
  264         struct work             isd_work;
  265         struct ifnet            *isd_ifp;
  266         bool                    isd_queued;
  267         bool                    isd_dying;
  268         bool                    isd_trigger;
  269 };
  270 
  271 /*
  272  * Hook for if_vlan - needed by if_agr
  273  */
  274 struct if_vlan_vlan_input_hook_t if_vlan_vlan_input_hook;
  275 
  276 static void if_sysctl_setup(struct sysctllog **);
  277 
  278 static int
  279 if_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
  280     void *arg0, void *arg1, void *arg2, void *arg3)
  281 {
  282         int result;
  283         enum kauth_network_req req;
  284 
  285         result = KAUTH_RESULT_DEFER;
  286         req = (enum kauth_network_req)(uintptr_t)arg1;
  287 
  288         if (action != KAUTH_NETWORK_INTERFACE)
  289                 return result;
  290 
  291         if ((req == KAUTH_REQ_NETWORK_INTERFACE_GET) ||
  292             (req == KAUTH_REQ_NETWORK_INTERFACE_SET))
  293                 result = KAUTH_RESULT_ALLOW;
  294 
  295         return result;
  296 }
  297 
  298 /*
  299  * Network interface utility routines.
  300  *
  301  * Routines with ifa_ifwith* names take sockaddr *'s as
  302  * parameters.
  303  */
  304 void
  305 ifinit(void)
  306 {
  307 
  308 #if (defined(INET) || defined(INET6))
  309         encapinit();
  310 #endif
  311 
  312         if_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
  313             if_listener_cb, NULL);
  314 
  315         /* interfaces are available, inform socket code */
  316         ifioctl = doifioctl;
  317 }
  318 
  319 /*
  320  * XXX Initialization before configure().
  321  * XXX hack to get pfil_add_hook working in autoconf.
  322  */
  323 void
  324 ifinit1(void)
  325 {
  326         int error __diagused;
  327 
  328 #ifdef NET_MPSAFE
  329         printf("NET_MPSAFE enabled\n");
  330 #endif
  331 
  332         mutex_init(&if_clone_mtx, MUTEX_DEFAULT, IPL_NONE);
  333 
  334         TAILQ_INIT(&ifnet_list);
  335         mutex_init(&ifnet_mtx, MUTEX_DEFAULT, IPL_NONE);
  336         ifnet_psz = pserialize_create();
  337         ifnet_psref_class = psref_class_create("ifnet", IPL_SOFTNET);
  338         ifa_psref_class = psref_class_create("ifa", IPL_SOFTNET);
  339         error = workqueue_create(&ifnet_link_state_wq, "iflnkst",
  340             if_link_state_change_work, NULL, PRI_SOFTNET, IPL_NET,
  341             WQ_MPSAFE);
  342         KASSERT(error == 0);
  343         PSLIST_INIT(&ifnet_pslist);
  344 
  345         error = workqueue_create(&if_slowtimo_wq, "ifwdog",
  346             if_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTCLOCK, WQ_MPSAFE);
  347         KASSERTMSG(error == 0, "error=%d", error);
  348 
  349         if_indexlim = 8;
  350 
  351         if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL);
  352         KASSERT(if_pfil != NULL);
  353 
  354 #if NETHER > 0 || defined(NETATALK) || defined(WLAN)
  355         etherinit();
  356 #endif
  357 }
  358 
  359 /* XXX must be after domaininit() */
  360 void
  361 ifinit_post(void)
  362 {
  363 
  364         if_sysctl_setup(NULL);
  365 }
  366 
  367 ifnet_t *
  368 if_alloc(u_char type)
  369 {
  370 
  371         return kmem_zalloc(sizeof(ifnet_t), KM_SLEEP);
  372 }
  373 
  374 void
  375 if_free(ifnet_t *ifp)
  376 {
  377 
  378         kmem_free(ifp, sizeof(ifnet_t));
  379 }
  380 
  381 void
  382 if_initname(struct ifnet *ifp, const char *name, int unit)
  383 {
  384 
  385         (void)snprintf(ifp->if_xname, sizeof(ifp->if_xname),
  386             "%s%d", name, unit);
  387 }
  388 
  389 /*
  390  * Null routines used while an interface is going away.  These routines
  391  * just return an error.
  392  */
  393 
  394 int
  395 if_nulloutput(struct ifnet *ifp, struct mbuf *m,
  396     const struct sockaddr *so, const struct rtentry *rt)
  397 {
  398 
  399         return ENXIO;
  400 }
  401 
  402 void
  403 if_nullinput(struct ifnet *ifp, struct mbuf *m)
  404 {
  405 
  406         /* Nothing. */
  407 }
  408 
  409 void
  410 if_nullstart(struct ifnet *ifp)
  411 {
  412 
  413         /* Nothing. */
  414 }
  415 
  416 int
  417 if_nulltransmit(struct ifnet *ifp, struct mbuf *m)
  418 {
  419 
  420         m_freem(m);
  421         return ENXIO;
  422 }
  423 
  424 int
  425 if_nullioctl(struct ifnet *ifp, u_long cmd, void *data)
  426 {
  427 
  428         return ENXIO;
  429 }
  430 
  431 int
  432 if_nullinit(struct ifnet *ifp)
  433 {
  434 
  435         return ENXIO;
  436 }
  437 
  438 void
  439 if_nullstop(struct ifnet *ifp, int disable)
  440 {
  441 
  442         /* Nothing. */
  443 }
  444 
  445 void
  446 if_nullslowtimo(struct ifnet *ifp)
  447 {
  448 
  449         /* Nothing. */
  450 }
  451 
  452 void
  453 if_nulldrain(struct ifnet *ifp)
  454 {
  455 
  456         /* Nothing. */
  457 }
  458 
  459 void
  460 if_set_sadl(struct ifnet *ifp, const void *lla, u_char addrlen, bool factory)
  461 {
  462         struct ifaddr *ifa;
  463         struct sockaddr_dl *sdl;
  464 
  465         ifp->if_addrlen = addrlen;
  466         if_alloc_sadl(ifp);
  467         ifa = ifp->if_dl;
  468         sdl = satosdl(ifa->ifa_addr);
  469 
  470         (void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, lla, ifp->if_addrlen);
  471         if (factory) {
  472                 KASSERT(ifp->if_hwdl == NULL);
  473                 ifp->if_hwdl = ifp->if_dl;
  474                 ifaref(ifp->if_hwdl);
  475         }
  476         /* TBD routing socket */
  477 }
  478 
  479 struct ifaddr *
  480 if_dl_create(const struct ifnet *ifp, const struct sockaddr_dl **sdlp)
  481 {
  482         unsigned socksize, ifasize;
  483         int addrlen, namelen;
  484         struct sockaddr_dl *mask, *sdl;
  485         struct ifaddr *ifa;
  486 
  487         namelen = strlen(ifp->if_xname);
  488         addrlen = ifp->if_addrlen;
  489         socksize = roundup(sockaddr_dl_measure(namelen, addrlen),
  490             sizeof(long));
  491         ifasize = sizeof(*ifa) + 2 * socksize;
  492         ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
  493 
  494         sdl = (struct sockaddr_dl *)(ifa + 1);
  495         mask = (struct sockaddr_dl *)(socksize + (char *)sdl);
  496 
  497         sockaddr_dl_init(sdl, socksize, ifp->if_index, ifp->if_type,
  498             ifp->if_xname, namelen, NULL, addrlen);
  499         mask->sdl_family = AF_LINK;
  500         mask->sdl_len = sockaddr_dl_measure(namelen, 0);
  501         memset(&mask->sdl_data[0], 0xff, namelen);
  502         ifa->ifa_rtrequest = link_rtrequest;
  503         ifa->ifa_addr = (struct sockaddr *)sdl;
  504         ifa->ifa_netmask = (struct sockaddr *)mask;
  505         ifa_psref_init(ifa);
  506 
  507         *sdlp = sdl;
  508 
  509         return ifa;
  510 }
  511 
  512 static void
  513 if_sadl_setrefs(struct ifnet *ifp, struct ifaddr *ifa)
  514 {
  515         const struct sockaddr_dl *sdl;
  516 
  517         ifp->if_dl = ifa;
  518         ifaref(ifa);
  519         sdl = satosdl(ifa->ifa_addr);
  520         ifp->if_sadl = sdl;
  521 }
  522 
  523 /*
  524  * Allocate the link level name for the specified interface.  This
  525  * is an attachment helper.  It must be called after ifp->if_addrlen
  526  * is initialized, which may not be the case when if_attach() is
  527  * called.
  528  */
  529 void
  530 if_alloc_sadl(struct ifnet *ifp)
  531 {
  532         struct ifaddr *ifa;
  533         const struct sockaddr_dl *sdl;
  534 
  535         /*
  536          * If the interface already has a link name, release it
  537          * now.  This is useful for interfaces that can change
  538          * link types, and thus switch link names often.
  539          */
  540         if (ifp->if_sadl != NULL)
  541                 if_free_sadl(ifp, 0);
  542 
  543         ifa = if_dl_create(ifp, &sdl);
  544 
  545         ifa_insert(ifp, ifa);
  546         if_sadl_setrefs(ifp, ifa);
  547 }
  548 
  549 static void
  550 if_deactivate_sadl(struct ifnet *ifp)
  551 {
  552         struct ifaddr *ifa;
  553 
  554         KASSERT(ifp->if_dl != NULL);
  555 
  556         ifa = ifp->if_dl;
  557 
  558         ifp->if_sadl = NULL;
  559 
  560         ifp->if_dl = NULL;
  561         ifafree(ifa);
  562 }
  563 
  564 static void
  565 if_replace_sadl(struct ifnet *ifp, struct ifaddr *ifa)
  566 {
  567         struct ifaddr *old;
  568 
  569         KASSERT(ifp->if_dl != NULL);
  570 
  571         old = ifp->if_dl;
  572 
  573         ifaref(ifa);
  574         /* XXX Update if_dl and if_sadl atomically */
  575         ifp->if_dl = ifa;
  576         ifp->if_sadl = satosdl(ifa->ifa_addr);
  577 
  578         ifafree(old);
  579 }
  580 
  581 void
  582 if_activate_sadl(struct ifnet *ifp, struct ifaddr *ifa0,
  583     const struct sockaddr_dl *sdl)
  584 {
  585         struct ifaddr *ifa;
  586         const int bound = curlwp_bind();
  587 
  588         KASSERT(ifa_held(ifa0));
  589 
  590         const int s = splsoftnet();
  591 
  592         if_replace_sadl(ifp, ifa0);
  593 
  594         int ss = pserialize_read_enter();
  595         IFADDR_READER_FOREACH(ifa, ifp) {
  596                 struct psref psref;
  597                 ifa_acquire(ifa, &psref);
  598                 pserialize_read_exit(ss);
  599 
  600                 rtinit(ifa, RTM_LLINFO_UPD, 0);
  601 
  602                 ss = pserialize_read_enter();
  603                 ifa_release(ifa, &psref);
  604         }
  605         pserialize_read_exit(ss);
  606 
  607         splx(s);
  608         curlwp_bindx(bound);
  609 }
  610 
  611 /*
  612  * Free the link level name for the specified interface.  This is
  613  * a detach helper.  This is called from if_detach().
  614  */
  615 void
  616 if_free_sadl(struct ifnet *ifp, int factory)
  617 {
  618         struct ifaddr *ifa;
  619 
  620         if (factory && ifp->if_hwdl != NULL) {
  621                 ifa = ifp->if_hwdl;
  622                 ifp->if_hwdl = NULL;
  623                 ifafree(ifa);
  624         }
  625 
  626         ifa = ifp->if_dl;
  627         if (ifa == NULL) {
  628                 KASSERT(ifp->if_sadl == NULL);
  629                 return;
  630         }
  631 
  632         KASSERT(ifp->if_sadl != NULL);
  633 
  634         const int s = splsoftnet();
  635         KASSERT(ifa->ifa_addr->sa_family == AF_LINK);
  636         ifa_remove(ifp, ifa);
  637         if_deactivate_sadl(ifp);
  638         splx(s);
  639 }
  640 
  641 static void
  642 if_getindex(ifnet_t *ifp)
  643 {
  644         bool hitlimit = false;
  645         char xnamebuf[HOOKNAMSIZ];
  646 
  647         ifp->if_index_gen = index_gen++;
  648         snprintf(xnamebuf, sizeof(xnamebuf), "%s-lshk", ifp->if_xname);
  649         ifp->if_linkstate_hooks = simplehook_create(IPL_NET,
  650             xnamebuf);
  651 
  652         ifp->if_index = if_index;
  653         if (ifindex2ifnet == NULL) {
  654                 if_index++;
  655                 goto skip;
  656         }
  657         while (if_byindex(ifp->if_index)) {
  658                 /*
  659                  * If we hit USHRT_MAX, we skip back to 0 since
  660                  * there are a number of places where the value
  661                  * of if_index or if_index itself is compared
  662                  * to or stored in an unsigned short.  By
  663                  * jumping back, we won't botch those assignments
  664                  * or comparisons.
  665                  */
  666                 if (++if_index == 0) {
  667                         if_index = 1;
  668                 } else if (if_index == USHRT_MAX) {
  669                         /*
  670                          * However, if we have to jump back to
  671                          * zero *twice* without finding an empty
  672                          * slot in ifindex2ifnet[], then there
  673                          * there are too many (>65535) interfaces.
  674                          */
  675                         if (hitlimit)
  676                                 panic("too many interfaces");
  677                         hitlimit = true;
  678                         if_index = 1;
  679                 }
  680                 ifp->if_index = if_index;
  681         }
  682 skip:
  683         /*
  684          * ifindex2ifnet is indexed by if_index. Since if_index will
  685          * grow dynamically, it should grow too.
  686          */
  687         if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) {
  688                 size_t m, n, oldlim;
  689                 void *q;
  690 
  691                 oldlim = if_indexlim;
  692                 while (ifp->if_index >= if_indexlim)
  693                         if_indexlim <<= 1;
  694 
  695                 /* grow ifindex2ifnet */
  696                 m = oldlim * sizeof(struct ifnet *);
  697                 n = if_indexlim * sizeof(struct ifnet *);
  698                 q = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
  699                 if (ifindex2ifnet != NULL) {
  700                         memcpy(q, ifindex2ifnet, m);
  701                         free(ifindex2ifnet, M_IFADDR);
  702                 }
  703                 ifindex2ifnet = (struct ifnet **)q;
  704         }
  705         ifindex2ifnet[ifp->if_index] = ifp;
  706 }
  707 
  708 /*
  709  * Initialize an interface and assign an index for it.
  710  *
  711  * It must be called prior to a device specific attach routine
  712  * (e.g., ether_ifattach and ieee80211_ifattach) or if_alloc_sadl,
  713  * and be followed by if_register:
  714  *
  715  *     if_initialize(ifp);
  716  *     ether_ifattach(ifp, enaddr);
  717  *     if_register(ifp);
  718  */
  719 void
  720 if_initialize(ifnet_t *ifp)
  721 {
  722 
  723         KASSERT(if_indexlim > 0);
  724         TAILQ_INIT(&ifp->if_addrlist);
  725 
  726         /*
  727          * Link level name is allocated later by a separate call to
  728          * if_alloc_sadl().
  729          */
  730 
  731         if (ifp->if_snd.ifq_maxlen == 0)
  732                 ifp->if_snd.ifq_maxlen = ifqmaxlen;
  733 
  734         ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
  735 
  736         ifp->if_link_state = LINK_STATE_UNKNOWN;
  737         ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
  738         ifp->if_link_scheduled = false;
  739 
  740         ifp->if_capenable = 0;
  741         ifp->if_csum_flags_tx = 0;
  742         ifp->if_csum_flags_rx = 0;
  743 
  744 #ifdef ALTQ
  745         ifp->if_snd.altq_type = 0;
  746         ifp->if_snd.altq_disc = NULL;
  747         ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
  748         ifp->if_snd.altq_tbr  = NULL;
  749         ifp->if_snd.altq_ifp  = ifp;
  750 #endif
  751 
  752         IFQ_LOCK_INIT(&ifp->if_snd);
  753 
  754         ifp->if_pfil = pfil_head_create(PFIL_TYPE_IFNET, ifp);
  755         pfil_run_ifhooks(if_pfil, PFIL_IFNET_ATTACH, ifp);
  756 
  757         IF_AFDATA_LOCK_INIT(ifp);
  758 
  759         PSLIST_ENTRY_INIT(ifp, if_pslist_entry);
  760         PSLIST_INIT(&ifp->if_addr_pslist);
  761         psref_target_init(&ifp->if_psref, ifnet_psref_class);
  762         ifp->if_ioctl_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  763         LIST_INIT(&ifp->if_multiaddrs);
  764         if_stats_init(ifp);
  765 
  766         IFNET_GLOBAL_LOCK();
  767         if_getindex(ifp);
  768         IFNET_GLOBAL_UNLOCK();
  769 }
  770 
  771 /*
  772  * Register an interface to the list of "active" interfaces.
  773  */
  774 void
  775 if_register(ifnet_t *ifp)
  776 {
  777         /*
  778          * If the driver has not supplied its own if_ioctl or if_stop,
  779          * then supply the default.
  780          */
  781         if (ifp->if_ioctl == NULL)
  782                 ifp->if_ioctl = ifioctl_common;
  783         if (ifp->if_stop == NULL)
  784                 ifp->if_stop = if_nullstop;
  785 
  786         sysctl_sndq_setup(&ifp->if_sysctl_log, ifp->if_xname, &ifp->if_snd);
  787 
  788         if (!STAILQ_EMPTY(&domains))
  789                 if_attachdomain1(ifp);
  790 
  791         /* Announce the interface. */
  792         rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
  793 
  794         if (ifp->if_slowtimo != NULL) {
  795                 struct if_slowtimo_data *isd;
  796 
  797                 isd = kmem_zalloc(sizeof(*isd), KM_SLEEP);
  798                 mutex_init(&isd->isd_lock, MUTEX_DEFAULT, IPL_SOFTCLOCK);
  799                 callout_init(&isd->isd_ch, CALLOUT_MPSAFE);
  800                 callout_setfunc(&isd->isd_ch, if_slowtimo_intr, ifp);
  801                 isd->isd_ifp = ifp;
  802 
  803                 ifp->if_slowtimo_data = isd;
  804 
  805                 if_slowtimo_intr(ifp);
  806 
  807                 sysctl_watchdog_setup(ifp);
  808         }
  809 
  810         if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit)
  811                 ifp->if_transmit = if_transmit;
  812 
  813         IFNET_GLOBAL_LOCK();
  814         TAILQ_INSERT_TAIL(&ifnet_list, ifp, if_list);
  815         IFNET_WRITER_INSERT_TAIL(ifp);
  816         IFNET_GLOBAL_UNLOCK();
  817 }
  818 
  819 /*
  820  * The if_percpuq framework
  821  *
  822  * It allows network device drivers to execute the network stack
  823  * in softint (so called softint-based if_input). It utilizes
  824  * softint and percpu ifqueue. It doesn't distribute any packets
  825  * between CPUs, unlike pktqueue(9).
  826  *
  827  * Currently we support two options for device drivers to apply the framework:
  828  * - Use it implicitly with less changes
  829  *   - If you use if_attach in driver's _attach function and if_input in
  830  *     driver's Rx interrupt handler, a packet is queued and a softint handles
  831  *     the packet implicitly
  832  * - Use it explicitly in each driver (recommended)
  833  *   - You can use if_percpuq_* directly in your driver
  834  *   - In this case, you need to allocate struct if_percpuq in driver's softc
  835  *   - See wm(4) as a reference implementation
  836  */
  837 
  838 static void
  839 if_percpuq_softint(void *arg)
  840 {
  841         struct if_percpuq *ipq = arg;
  842         struct ifnet *ifp = ipq->ipq_ifp;
  843         struct mbuf *m;
  844 
  845         while ((m = if_percpuq_dequeue(ipq)) != NULL) {
  846                 if_statinc(ifp, if_ipackets);
  847                 bpf_mtap(ifp, m, BPF_D_IN);
  848 
  849                 ifp->_if_input(ifp, m);
  850         }
  851 }
  852 
  853 static void
  854 if_percpuq_init_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
  855 {
  856         struct ifqueue *const ifq = p;
  857 
  858         memset(ifq, 0, sizeof(*ifq));
  859         ifq->ifq_maxlen = IFQ_MAXLEN;
  860 }
  861 
  862 struct if_percpuq *
  863 if_percpuq_create(struct ifnet *ifp)
  864 {
  865         struct if_percpuq *ipq;
  866         u_int flags = SOFTINT_NET;
  867 
  868         flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;
  869 
  870         ipq = kmem_zalloc(sizeof(*ipq), KM_SLEEP);
  871         ipq->ipq_ifp = ifp;
  872         ipq->ipq_si = softint_establish(flags, if_percpuq_softint, ipq);
  873         ipq->ipq_ifqs = percpu_alloc(sizeof(struct ifqueue));
  874         percpu_foreach(ipq->ipq_ifqs, &if_percpuq_init_ifq, NULL);
  875 
  876         sysctl_percpuq_setup(&ifp->if_sysctl_log, ifp->if_xname, ipq);
  877 
  878         return ipq;
  879 }
  880 
  881 static struct mbuf *
  882 if_percpuq_dequeue(struct if_percpuq *ipq)
  883 {
  884         struct mbuf *m;
  885         struct ifqueue *ifq;
  886 
  887         const int s = splnet();
  888         ifq = percpu_getref(ipq->ipq_ifqs);
  889         IF_DEQUEUE(ifq, m);
  890         percpu_putref(ipq->ipq_ifqs);
  891         splx(s);
  892 
  893         return m;
  894 }
  895 
  896 static void
  897 if_percpuq_purge_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
  898 {
  899         struct ifqueue *const ifq = p;
  900 
  901         IF_PURGE(ifq);
  902 }
  903 
  904 void
  905 if_percpuq_destroy(struct if_percpuq *ipq)
  906 {
  907 
  908         /* if_detach may already destroy it */
  909         if (ipq == NULL)
  910                 return;
  911 
  912         softint_disestablish(ipq->ipq_si);
  913         percpu_foreach(ipq->ipq_ifqs, &if_percpuq_purge_ifq, NULL);
  914         percpu_free(ipq->ipq_ifqs, sizeof(struct ifqueue));
  915         kmem_free(ipq, sizeof(*ipq));
  916 }
  917 
  918 void
  919 if_percpuq_enqueue(struct if_percpuq *ipq, struct mbuf *m)
  920 {
  921         struct ifqueue *ifq;
  922 
  923         KASSERT(ipq != NULL);
  924 
  925         const int s = splnet();
  926         ifq = percpu_getref(ipq->ipq_ifqs);
  927         if (IF_QFULL(ifq)) {
  928                 IF_DROP(ifq);
  929                 percpu_putref(ipq->ipq_ifqs);
  930                 m_freem(m);
  931                 goto out;
  932         }
  933         IF_ENQUEUE(ifq, m);
  934         percpu_putref(ipq->ipq_ifqs);
  935 
  936         softint_schedule(ipq->ipq_si);
  937 out:
  938         splx(s);
  939 }
  940 
  941 static void
  942 if_percpuq_drops(void *p, void *arg, struct cpu_info *ci __unused)
  943 {
  944         struct ifqueue *const ifq = p;
  945         uint64_t *sum = arg;
  946 
  947         *sum += ifq->ifq_drops;
  948 }
  949 
  950 static int
  951 sysctl_percpuq_drops_handler(SYSCTLFN_ARGS)
  952 {
  953         struct sysctlnode node;
  954         struct if_percpuq *ipq;
  955         uint64_t sum = 0;
  956         int error;
  957 
  958         node = *rnode;
  959         ipq = node.sysctl_data;
  960 
  961         percpu_foreach(ipq->ipq_ifqs, if_percpuq_drops, &sum);
  962 
  963         node.sysctl_data = &sum;
  964         error = sysctl_lookup(SYSCTLFN_CALL(&node));
  965         if (error != 0 || newp == NULL)
  966                 return error;
  967 
  968         return 0;
  969 }
  970 
  971 static void
  972 sysctl_percpuq_setup(struct sysctllog **clog, const char* ifname,
  973     struct if_percpuq *ipq)
  974 {
  975         const struct sysctlnode *cnode, *rnode;
  976 
  977         if (sysctl_createv(clog, 0, NULL, &rnode,
  978                        CTLFLAG_PERMANENT,
  979                        CTLTYPE_NODE, "interfaces",
  980                        SYSCTL_DESCR("Per-interface controls"),
  981                        NULL, 0, NULL, 0,
  982                        CTL_NET, CTL_CREATE, CTL_EOL) != 0)
  983                 goto bad;
  984 
  985         if (sysctl_createv(clog, 0, &rnode, &rnode,
  986                        CTLFLAG_PERMANENT,
  987                        CTLTYPE_NODE, ifname,
  988                        SYSCTL_DESCR("Interface controls"),
  989                        NULL, 0, NULL, 0,
  990                        CTL_CREATE, CTL_EOL) != 0)
  991                 goto bad;
  992 
  993         if (sysctl_createv(clog, 0, &rnode, &rnode,
  994                        CTLFLAG_PERMANENT,
  995                        CTLTYPE_NODE, "rcvq",
  996                        SYSCTL_DESCR("Interface input queue controls"),
  997                        NULL, 0, NULL, 0,
  998                        CTL_CREATE, CTL_EOL) != 0)
  999                 goto bad;
 1000 
 1001 #ifdef NOTYET
 1002         /* XXX Should show each per-CPU queue length? */
 1003         if (sysctl_createv(clog, 0, &rnode, &rnode,
 1004                        CTLFLAG_PERMANENT,
 1005                        CTLTYPE_INT, "len",
 1006                        SYSCTL_DESCR("Current input queue length"),
 1007                        sysctl_percpuq_len, 0, NULL, 0,
 1008                        CTL_CREATE, CTL_EOL) != 0)
 1009                 goto bad;
 1010 
 1011         if (sysctl_createv(clog, 0, &rnode, &cnode,
 1012                        CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
 1013                        CTLTYPE_INT, "maxlen",
 1014                        SYSCTL_DESCR("Maximum allowed input queue length"),
 1015                        sysctl_percpuq_maxlen_handler, 0, (void *)ipq, 0,
 1016                        CTL_CREATE, CTL_EOL) != 0)
 1017                 goto bad;
 1018 #endif
 1019 
 1020         if (sysctl_createv(clog, 0, &rnode, &cnode,
 1021                        CTLFLAG_PERMANENT,
 1022                        CTLTYPE_QUAD, "drops",
 1023                        SYSCTL_DESCR("Total packets dropped due to full input queue"),
 1024                        sysctl_percpuq_drops_handler, 0, (void *)ipq, 0,
 1025                        CTL_CREATE, CTL_EOL) != 0)
 1026                 goto bad;
 1027 
 1028         return;
 1029 bad:
 1030         printf("%s: could not attach sysctl nodes\n", ifname);
 1031         return;
 1032 }
 1033 
 1034 /*
 1035  * The deferred if_start framework
 1036  *
 1037  * The common APIs to defer if_start to softint when if_start is requested
 1038  * from a device driver running in hardware interrupt context.
 1039  */
 1040 /*
 1041  * Call ifp->if_start (or equivalent) in a dedicated softint for
 1042  * deferred if_start.
 1043  */
 1044 static void
 1045 if_deferred_start_softint(void *arg)
 1046 {
 1047         struct if_deferred_start *ids = arg;
 1048         struct ifnet *ifp = ids->ids_ifp;
 1049 
 1050         ids->ids_if_start(ifp);
 1051 }
 1052 
 1053 /*
 1054  * The default callback function for deferred if_start.
 1055  */
 1056 static void
 1057 if_deferred_start_common(struct ifnet *ifp)
 1058 {
 1059         const int s = splnet();
 1060         if_start_lock(ifp);
 1061         splx(s);
 1062 }
 1063 
 1064 static inline bool
 1065 if_snd_is_used(struct ifnet *ifp)
 1066 {
 1067 
 1068         return ALTQ_IS_ENABLED(&ifp->if_snd) ||
 1069             ifp->if_transmit == if_transmit ||
 1070             ifp->if_transmit == NULL ||
 1071             ifp->if_transmit == if_nulltransmit;
 1072 }
 1073 
 1074 /*
 1075  * Schedule deferred if_start.
 1076  */
 1077 void
 1078 if_schedule_deferred_start(struct ifnet *ifp)
 1079 {
 1080 
 1081         KASSERT(ifp->if_deferred_start != NULL);
 1082 
 1083         if (if_snd_is_used(ifp) && IFQ_IS_EMPTY(&ifp->if_snd))
 1084                 return;
 1085 
 1086         softint_schedule(ifp->if_deferred_start->ids_si);
 1087 }
 1088 
 1089 /*
 1090  * Create an instance of deferred if_start. A driver should call the function
 1091  * only if the driver needs deferred if_start. Drivers can setup their own
 1092  * deferred if_start function via 2nd argument.
 1093  */
 1094 void
 1095 if_deferred_start_init(struct ifnet *ifp, void (*func)(struct ifnet *))
 1096 {
 1097         struct if_deferred_start *ids;
 1098         u_int flags = SOFTINT_NET;
 1099 
 1100         flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;
 1101 
 1102         ids = kmem_zalloc(sizeof(*ids), KM_SLEEP);
 1103         ids->ids_ifp = ifp;
 1104         ids->ids_si = softint_establish(flags, if_deferred_start_softint, ids);
 1105         if (func != NULL)
 1106                 ids->ids_if_start = func;
 1107         else
 1108                 ids->ids_if_start = if_deferred_start_common;
 1109 
 1110         ifp->if_deferred_start = ids;
 1111 }
 1112 
 1113 static void
 1114 if_deferred_start_destroy(struct ifnet *ifp)
 1115 {
 1116 
 1117         if (ifp->if_deferred_start == NULL)
 1118                 return;
 1119 
 1120         softint_disestablish(ifp->if_deferred_start->ids_si);
 1121         kmem_free(ifp->if_deferred_start, sizeof(*ifp->if_deferred_start));
 1122         ifp->if_deferred_start = NULL;
 1123 }
 1124 
 1125 /*
 1126  * The common interface input routine that is called by device drivers,
 1127  * which should be used only when the driver's rx handler already runs
 1128  * in softint.
 1129  */
 1130 void
 1131 if_input(struct ifnet *ifp, struct mbuf *m)
 1132 {
 1133 
 1134         KASSERT(ifp->if_percpuq == NULL);
 1135         KASSERT(!cpu_intr_p());
 1136 
 1137         if_statinc(ifp, if_ipackets);
 1138         bpf_mtap(ifp, m, BPF_D_IN);
 1139 
 1140         ifp->_if_input(ifp, m);
 1141 }
 1142 
 1143 /*
 1144  * DEPRECATED. Use if_initialize and if_register instead.
 1145  * See the above comment of if_initialize.
 1146  *
 1147  * Note that it implicitly enables if_percpuq to make drivers easy to
 1148  * migrate softint-based if_input without much changes. If you don't
 1149  * want to enable it, use if_initialize instead.
 1150  */
 1151 void
 1152 if_attach(ifnet_t *ifp)
 1153 {
 1154 
 1155         if_initialize(ifp);
 1156         ifp->if_percpuq = if_percpuq_create(ifp);
 1157         if_register(ifp);
 1158 }
 1159 
 1160 void
 1161 if_attachdomain(void)
 1162 {
 1163         struct ifnet *ifp;
 1164         const int bound = curlwp_bind();
 1165 
 1166         int s = pserialize_read_enter();
 1167         IFNET_READER_FOREACH(ifp) {
 1168                 struct psref psref;
 1169                 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
 1170                 pserialize_read_exit(s);
 1171                 if_attachdomain1(ifp);
 1172                 s = pserialize_read_enter();
 1173                 psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
 1174         }
 1175         pserialize_read_exit(s);
 1176         curlwp_bindx(bound);
 1177 }
 1178 
 1179 static void
 1180 if_attachdomain1(struct ifnet *ifp)
 1181 {
 1182         struct domain *dp;
 1183         const int s = splsoftnet();
 1184 
 1185         /* address family dependent data region */
 1186         memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata));
 1187         DOMAIN_FOREACH(dp) {
 1188                 if (dp->dom_ifattach != NULL)
 1189                         ifp->if_afdata[dp->dom_family] =
 1190                             (*dp->dom_ifattach)(ifp);
 1191         }
 1192 
 1193         splx(s);
 1194 }
 1195 
 1196 /*
 1197  * Deactivate an interface.  This points all of the procedure
 1198  * handles at error stubs.  May be called from interrupt context.
 1199  */
 1200 void
 1201 if_deactivate(struct ifnet *ifp)
 1202 {
 1203         const int s = splsoftnet();
 1204 
 1205         ifp->if_output   = if_nulloutput;
 1206         ifp->_if_input   = if_nullinput;
 1207         ifp->if_start    = if_nullstart;
 1208         ifp->if_transmit = if_nulltransmit;
 1209         ifp->if_ioctl    = if_nullioctl;
 1210         ifp->if_init     = if_nullinit;
 1211         ifp->if_stop     = if_nullstop;
 1212         if (ifp->if_slowtimo)
 1213                 ifp->if_slowtimo = if_nullslowtimo;
 1214         ifp->if_drain    = if_nulldrain;
 1215 
 1216         /* No more packets may be enqueued. */
 1217         ifp->if_snd.ifq_maxlen = 0;
 1218 
 1219         splx(s);
 1220 }
 1221 
 1222 bool
 1223 if_is_deactivated(const struct ifnet *ifp)
 1224 {
 1225 
 1226         return ifp->if_output == if_nulloutput;
 1227 }
 1228 
 1229 void
 1230 if_purgeaddrs(struct ifnet *ifp, int family,
 1231     void (*purgeaddr)(struct ifaddr *))
 1232 {
 1233         struct ifaddr *ifa, *nifa;
 1234         int s;
 1235 
 1236         s = pserialize_read_enter();
 1237         for (ifa = IFADDR_READER_FIRST(ifp); ifa; ifa = nifa) {
 1238                 nifa = IFADDR_READER_NEXT(ifa);
 1239                 if (ifa->ifa_addr->sa_family != family)
 1240                         continue;
 1241                 pserialize_read_exit(s);
 1242 
 1243                 (*purgeaddr)(ifa);
 1244 
 1245                 s = pserialize_read_enter();
 1246         }
 1247         pserialize_read_exit(s);
 1248 }
 1249 
 1250 #ifdef IFAREF_DEBUG
 1251 static struct ifaddr **ifa_list;
 1252 static int ifa_list_size;
 1253 
 1254 /* Depends on only one if_attach runs at once */
 1255 static void
 1256 if_build_ifa_list(struct ifnet *ifp)
 1257 {
 1258         struct ifaddr *ifa;
 1259         int i;
 1260 
 1261         KASSERT(ifa_list == NULL);
 1262         KASSERT(ifa_list_size == 0);
 1263 
 1264         IFADDR_READER_FOREACH(ifa, ifp)
 1265                 ifa_list_size++;
 1266 
 1267         ifa_list = kmem_alloc(sizeof(*ifa) * ifa_list_size, KM_SLEEP);
 1268         i = 0;
 1269         IFADDR_READER_FOREACH(ifa, ifp) {
 1270                 ifa_list[i++] = ifa;
 1271                 ifaref(ifa);
 1272         }
 1273 }
 1274 
 1275 static void
 1276 if_check_and_free_ifa_list(struct ifnet *ifp)
 1277 {
 1278         int i;
 1279         struct ifaddr *ifa;
 1280 
 1281         if (ifa_list == NULL)
 1282                 return;
 1283 
 1284         for (i = 0; i < ifa_list_size; i++) {
 1285                 char buf[64];
 1286 
 1287                 ifa = ifa_list[i];
 1288                 sockaddr_format(ifa->ifa_addr, buf, sizeof(buf));
 1289                 if (ifa->ifa_refcnt > 1) {
 1290                         log(LOG_WARNING,
 1291                             "ifa(%s) still referenced (refcnt=%d)\n",
 1292                             buf, ifa->ifa_refcnt - 1);
 1293                 } else
 1294                         log(LOG_DEBUG,
 1295                             "ifa(%s) not referenced (refcnt=%d)\n",
 1296                             buf, ifa->ifa_refcnt - 1);
 1297                 ifafree(ifa);
 1298         }
 1299 
 1300         kmem_free(ifa_list, sizeof(*ifa) * ifa_list_size);
 1301         ifa_list = NULL;
 1302         ifa_list_size = 0;
 1303 }
 1304 #endif
 1305 
 1306 /*
 1307  * Detach an interface from the list of "active" interfaces,
 1308  * freeing any resources as we go along.
 1309  *
 1310  * NOTE: This routine must be called with a valid thread context,
 1311  * as it may block.
 1312  */
 1313 void
 1314 if_detach(struct ifnet *ifp)
 1315 {
 1316         struct socket so;
 1317         struct ifaddr *ifa;
 1318 #ifdef IFAREF_DEBUG
 1319         struct ifaddr *last_ifa = NULL;
 1320 #endif
 1321         struct domain *dp;
 1322         const struct protosw *pr;
 1323         int i, family, purged;
 1324 
 1325 #ifdef IFAREF_DEBUG
 1326         if_build_ifa_list(ifp);
 1327 #endif
 1328         /*
 1329          * XXX It's kind of lame that we have to have the
 1330          * XXX socket structure...
 1331          */
 1332         memset(&so, 0, sizeof(so));
 1333 
 1334         const int s = splnet();
 1335 
 1336         sysctl_teardown(&ifp->if_sysctl_log);
 1337 
 1338         IFNET_LOCK(ifp);
 1339 
 1340         /*
 1341          * Unset all queued link states and pretend a
 1342          * link state change is scheduled.
 1343          * This stops any more link state changes occurring for this
 1344          * interface while it's being detached so it's safe
 1345          * to drain the workqueue.
 1346          */
 1347         IF_LINK_STATE_CHANGE_LOCK(ifp);
 1348         ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
 1349         ifp->if_link_scheduled = true;
 1350         IF_LINK_STATE_CHANGE_UNLOCK(ifp);
 1351         workqueue_wait(ifnet_link_state_wq, &ifp->if_link_work);
 1352 
 1353         if_deactivate(ifp);
 1354         IFNET_UNLOCK(ifp);
 1355 
 1356         /*
 1357          * Unlink from the list and wait for all readers to leave
 1358          * from pserialize read sections.  Note that we can't do
 1359          * psref_target_destroy here.  See below.
 1360          */
 1361         IFNET_GLOBAL_LOCK();
 1362         ifindex2ifnet[ifp->if_index] = NULL;
 1363         TAILQ_REMOVE(&ifnet_list, ifp, if_list);
 1364         IFNET_WRITER_REMOVE(ifp);
 1365         pserialize_perform(ifnet_psz);
 1366         IFNET_GLOBAL_UNLOCK();
 1367 
 1368         if (ifp->if_slowtimo != NULL) {
 1369                 struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
 1370 
 1371                 mutex_enter(&isd->isd_lock);
 1372                 isd->isd_dying = true;
 1373                 mutex_exit(&isd->isd_lock);
 1374                 callout_halt(&isd->isd_ch, NULL);
 1375                 workqueue_wait(if_slowtimo_wq, &isd->isd_work);
 1376                 callout_destroy(&isd->isd_ch);
 1377                 mutex_destroy(&isd->isd_lock);
 1378                 kmem_free(isd, sizeof(*isd));
 1379 
 1380                 ifp->if_slowtimo_data = NULL; /* paraonia */
 1381                 ifp->if_slowtimo = NULL;      /* paranoia */
 1382         }
 1383         if_deferred_start_destroy(ifp);
 1384 
 1385         /*
 1386          * Do an if_down() to give protocols a chance to do something.
 1387          */
 1388         if_down_deactivated(ifp);
 1389 
 1390 #ifdef ALTQ
 1391         if (ALTQ_IS_ENABLED(&ifp->if_snd))
 1392                 altq_disable(&ifp->if_snd);
 1393         if (ALTQ_IS_ATTACHED(&ifp->if_snd))
 1394                 altq_detach(&ifp->if_snd);
 1395 #endif
 1396 
 1397 #if NCARP > 0
 1398         /* Remove the interface from any carp group it is a part of.  */
 1399         if (ifp->if_carp != NULL && ifp->if_type != IFT_CARP)
 1400                 carp_ifdetach(ifp);
 1401 #endif
 1402 
 1403         /*
 1404          * Ensure that all packets on protocol input pktqueues have been
 1405          * processed, or, at least, removed from the queues.
 1406          *
 1407          * A cross-call will ensure that the interrupts have completed.
 1408          * FIXME: not quite..
 1409          */
 1410         pktq_ifdetach();
 1411         xc_barrier(0);
 1412 
 1413         /*
 1414          * Rip all the addresses off the interface.  This should make
 1415          * all of the routes go away.
 1416          *
 1417          * pr_usrreq calls can remove an arbitrary number of ifaddrs
 1418          * from the list, including our "cursor", ifa.  For safety,
 1419          * and to honor the TAILQ abstraction, I just restart the
 1420          * loop after each removal.  Note that the loop will exit
 1421          * when all of the remaining ifaddrs belong to the AF_LINK
 1422          * family.  I am counting on the historical fact that at
 1423          * least one pr_usrreq in each address domain removes at
 1424          * least one ifaddr.
 1425          */
 1426 again:
 1427         /*
 1428          * At this point, no other one tries to remove ifa in the list,
 1429          * so we don't need to take a lock or psref.  Avoid using
 1430          * IFADDR_READER_FOREACH to pass over an inspection of contract
 1431          * violations of pserialize.
 1432          */
 1433         IFADDR_WRITER_FOREACH(ifa, ifp) {
 1434                 family = ifa->ifa_addr->sa_family;
 1435 #ifdef IFAREF_DEBUG
 1436                 printf("if_detach: ifaddr %p, family %d, refcnt %d\n",
 1437                     ifa, family, ifa->ifa_refcnt);
 1438                 if (last_ifa != NULL && ifa == last_ifa)
 1439                         panic("if_detach: loop detected");
 1440                 last_ifa = ifa;
 1441 #endif
 1442                 if (family == AF_LINK)
 1443                         continue;
 1444                 dp = pffinddomain(family);
 1445                 KASSERTMSG(dp != NULL, "no domain for AF %d", family);
 1446                 /*
 1447                  * XXX These PURGEIF calls are redundant with the
 1448                  * purge-all-families calls below, but are left in for
 1449                  * now both to make a smaller change, and to avoid
 1450                  * unplanned interactions with clearing of
 1451                  * ifp->if_addrlist.
 1452                  */
 1453                 purged = 0;
 1454                 for (pr = dp->dom_protosw;
 1455                      pr < dp->dom_protoswNPROTOSW; pr++) {
 1456                         so.so_proto = pr;
 1457                         if (pr->pr_usrreqs) {
 1458                                 (void) (*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
 1459                                 purged = 1;
 1460                         }
 1461                 }
 1462                 if (purged == 0) {
 1463                         /*
 1464                          * XXX What's really the best thing to do
 1465                          * XXX here?  --thorpej@NetBSD.org
 1466                          */
 1467                         printf("if_detach: WARNING: AF %d not purged\n",
 1468                             family);
 1469                         ifa_remove(ifp, ifa);
 1470                 }
 1471                 goto again;
 1472         }
 1473 
 1474         if_free_sadl(ifp, 1);
 1475 
 1476 restart:
 1477         IFADDR_WRITER_FOREACH(ifa, ifp) {
 1478                 family = ifa->ifa_addr->sa_family;
 1479                 KASSERT(family == AF_LINK);
 1480                 ifa_remove(ifp, ifa);
 1481                 goto restart;
 1482         }
 1483 
 1484         /* Delete stray routes from the routing table. */
 1485         for (i = 0; i <= AF_MAX; i++)
 1486                 rt_delete_matched_entries(i, if_delroute_matcher, ifp, false);
 1487 
 1488         DOMAIN_FOREACH(dp) {
 1489                 if (dp->dom_ifdetach != NULL && ifp->if_afdata[dp->dom_family])
 1490                 {
 1491                         void *p = ifp->if_afdata[dp->dom_family];
 1492                         if (p) {
 1493                                 ifp->if_afdata[dp->dom_family] = NULL;
 1494                                 (*dp->dom_ifdetach)(ifp, p);
 1495                         }
 1496                 }
 1497 
 1498                 /*
 1499                  * One would expect multicast memberships (INET and
 1500                  * INET6) on UDP sockets to be purged by the PURGEIF
 1501                  * calls above, but if all addresses were removed from
 1502                  * the interface prior to destruction, the calls will
 1503                  * not be made (e.g. ppp, for which pppd(8) generally
 1504                  * removes addresses before destroying the interface).
 1505                  * Because there is no invariant that multicast
 1506                  * memberships only exist for interfaces with IPv4
 1507                  * addresses, we must call PURGEIF regardless of
 1508                  * addresses.  (Protocols which might store ifnet
 1509                  * pointers are marked with PR_PURGEIF.)
 1510                  */
 1511                 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
 1512                 {
 1513                         so.so_proto = pr;
 1514                         if (pr->pr_usrreqs && pr->pr_flags & PR_PURGEIF)
 1515                                 (void)(*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
 1516                 }
 1517         }
 1518 
 1519         /*
 1520          * Must be done after the above pr_purgeif because if_psref may be
 1521          * still used in pr_purgeif.
 1522          */
 1523         psref_target_destroy(&ifp->if_psref, ifnet_psref_class);
 1524         PSLIST_ENTRY_DESTROY(ifp, if_pslist_entry);
 1525 
 1526         pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp);
 1527         (void)pfil_head_destroy(ifp->if_pfil);
 1528 
 1529         /* Announce that the interface is gone. */
 1530         rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 1531 
 1532         IF_AFDATA_LOCK_DESTROY(ifp);
 1533 
 1534         if (ifp->if_percpuq != NULL) {
 1535                 if_percpuq_destroy(ifp->if_percpuq);
 1536                 ifp->if_percpuq = NULL;
 1537         }
 1538 
 1539         mutex_obj_free(ifp->if_ioctl_lock);
 1540         ifp->if_ioctl_lock = NULL;
 1541         mutex_obj_free(ifp->if_snd.ifq_lock);
 1542         if_stats_fini(ifp);
 1543         KASSERT(!simplehook_has_hooks(ifp->if_linkstate_hooks));
 1544         simplehook_destroy(ifp->if_linkstate_hooks);
 1545 
 1546         splx(s);
 1547 
 1548 #ifdef IFAREF_DEBUG
 1549         if_check_and_free_ifa_list(ifp);
 1550 #endif
 1551 }
 1552 
 1553 /*
 1554  * Callback for a radix tree walk to delete all references to an
 1555  * ifnet.
 1556  */
 1557 static int
 1558 if_delroute_matcher(struct rtentry *rt, void *v)
 1559 {
 1560         struct ifnet *ifp = (struct ifnet *)v;
 1561 
 1562         if (rt->rt_ifp == ifp)
 1563                 return 1;
 1564         else
 1565                 return 0;
 1566 }
 1567 
 1568 /*
 1569  * Create a clone network interface.
 1570  */
 1571 static int
 1572 if_clone_create(const char *name)
 1573 {
 1574         struct if_clone *ifc;
 1575         struct ifnet *ifp;
 1576         struct psref psref;
 1577         int unit;
 1578 
 1579         KASSERT(mutex_owned(&if_clone_mtx));
 1580 
 1581         ifc = if_clone_lookup(name, &unit);
 1582         if (ifc == NULL)
 1583                 return EINVAL;
 1584 
 1585         ifp = if_get(name, &psref);
 1586         if (ifp != NULL) {
 1587                 if_put(ifp, &psref);
 1588                 return EEXIST;
 1589         }
 1590 
 1591         return (*ifc->ifc_create)(ifc, unit);
 1592 }
 1593 
 1594 /*
 1595  * Destroy a clone network interface.
 1596  */
 1597 static int
 1598 if_clone_destroy(const char *name)
 1599 {
 1600         struct if_clone *ifc;
 1601         struct ifnet *ifp;
 1602         struct psref psref;
 1603         int error;
 1604         int (*if_ioctlfn)(struct ifnet *, u_long, void *);
 1605 
 1606         KASSERT(mutex_owned(&if_clone_mtx));
 1607 
 1608         ifc = if_clone_lookup(name, NULL);
 1609         if (ifc == NULL)
 1610                 return EINVAL;
 1611 
 1612         if (ifc->ifc_destroy == NULL)
 1613                 return EOPNOTSUPP;
 1614 
 1615         ifp = if_get(name, &psref);
 1616         if (ifp == NULL)
 1617                 return ENXIO;
 1618 
 1619         /* We have to disable ioctls here */
 1620         IFNET_LOCK(ifp);
 1621         if_ioctlfn = ifp->if_ioctl;
 1622         ifp->if_ioctl = if_nullioctl;
 1623         IFNET_UNLOCK(ifp);
 1624 
 1625         /*
 1626          * We cannot call ifc_destroy with holding ifp.
 1627          * Releasing ifp here is safe thanks to if_clone_mtx.
 1628          */
 1629         if_put(ifp, &psref);
 1630 
 1631         error = (*ifc->ifc_destroy)(ifp);
 1632 
 1633         if (error != 0) {
 1634                 /* We have to restore if_ioctl on error */
 1635                 IFNET_LOCK(ifp);
 1636                 ifp->if_ioctl = if_ioctlfn;
 1637                 IFNET_UNLOCK(ifp);
 1638         }
 1639 
 1640         return error;
 1641 }
 1642 
 1643 static bool
 1644 if_is_unit(const char *name)
 1645 {
 1646 
 1647         while (*name != '\0') {
 1648                 if (*name < '' || *name > '9')
 1649                         return false;
 1650                 name++;
 1651         }
 1652 
 1653         return true;
 1654 }
 1655 
 1656 /*
 1657  * Look up a network interface cloner.
 1658  */
 1659 static struct if_clone *
 1660 if_clone_lookup(const char *name, int *unitp)
 1661 {
 1662         struct if_clone *ifc;
 1663         const char *cp;
 1664         char *dp, ifname[IFNAMSIZ + 3];
 1665         int unit;
 1666 
 1667         KASSERT(mutex_owned(&if_clone_mtx));
 1668 
 1669         strcpy(ifname, "if_");
 1670         /* separate interface name from unit */
 1671         /* TODO: search unit number from backward */
 1672         for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ &&
 1673             *cp && !if_is_unit(cp);)
 1674                 *dp++ = *cp++;
 1675 
 1676         if (cp == name || cp - name == IFNAMSIZ || !*cp)
 1677                 return NULL;    /* No name or unit number */
 1678         *dp++ = '\0';
 1679 
 1680 again:
 1681         LIST_FOREACH(ifc, &if_cloners, ifc_list) {
 1682                 if (strcmp(ifname + 3, ifc->ifc_name) == 0)
 1683                         break;
 1684         }
 1685 
 1686         if (ifc == NULL) {
 1687                 int error;
 1688                 if (*ifname == '\0')
 1689                         return NULL;
 1690                 mutex_exit(&if_clone_mtx);
 1691                 error = module_autoload(ifname, MODULE_CLASS_DRIVER);
 1692                 mutex_enter(&if_clone_mtx);
 1693                 if (error)
 1694                         return NULL;
 1695                 *ifname = '\0';
 1696                 goto again;
 1697         }
 1698 
 1699         unit = 0;
 1700         while (cp - name < IFNAMSIZ && *cp) {
 1701                 if (*cp < '' || *cp > '9' || unit >= INT_MAX / 10) {
 1702                         /* Bogus unit number. */
 1703                         return NULL;
 1704                 }
 1705                 unit = (unit * 10) + (*cp++ - '');
 1706         }
 1707 
 1708         if (unitp != NULL)
 1709                 *unitp = unit;
 1710         return ifc;
 1711 }
 1712 
 1713 /*
 1714  * Register a network interface cloner.
 1715  */
 1716 void
 1717 if_clone_attach(struct if_clone *ifc)
 1718 {
 1719 
 1720         mutex_enter(&if_clone_mtx);
 1721         LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
 1722         if_cloners_count++;
 1723         mutex_exit(&if_clone_mtx);
 1724 }
 1725 
 1726 /*
 1727  * Unregister a network interface cloner.
 1728  */
 1729 void
 1730 if_clone_detach(struct if_clone *ifc)
 1731 {
 1732 
 1733         mutex_enter(&if_clone_mtx);
 1734         LIST_REMOVE(ifc, ifc_list);
 1735         if_cloners_count--;
 1736         mutex_exit(&if_clone_mtx);
 1737 }
 1738 
 1739 /*
 1740  * Provide list of interface cloners to userspace.
 1741  */
 1742 int
 1743 if_clone_list(int buf_count, char *buffer, int *total)
 1744 {
 1745         char outbuf[IFNAMSIZ], *dst;
 1746         struct if_clone *ifc;
 1747         int count, error = 0;
 1748 
 1749         mutex_enter(&if_clone_mtx);
 1750         *total = if_cloners_count;
 1751         if ((dst = buffer) == NULL) {
 1752                 /* Just asking how many there are. */
 1753                 goto out;
 1754         }
 1755 
 1756         if (buf_count < 0) {
 1757                 error = EINVAL;
 1758                 goto out;
 1759         }
 1760 
 1761         count = (if_cloners_count < buf_count) ? if_cloners_count : buf_count;
 1762 
 1763         for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
 1764              ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
 1765                 (void)strncpy(outbuf, ifc->ifc_name, sizeof(outbuf));
 1766                 if (outbuf[sizeof(outbuf) - 1] != '\0') {
 1767                         error = ENAMETOOLONG;
 1768                         goto out;
 1769                 }
 1770                 error = copyout(outbuf, dst, sizeof(outbuf));
 1771                 if (error != 0)
 1772                         break;
 1773         }
 1774 
 1775 out:
 1776         mutex_exit(&if_clone_mtx);
 1777         return error;
 1778 }
 1779 
 1780 void
 1781 ifa_psref_init(struct ifaddr *ifa)
 1782 {
 1783 
 1784         psref_target_init(&ifa->ifa_psref, ifa_psref_class);
 1785 }
 1786 
 1787 void
 1788 ifaref(struct ifaddr *ifa)
 1789 {
 1790 
 1791         atomic_inc_uint(&ifa->ifa_refcnt);
 1792 }
 1793 
 1794 void
 1795 ifafree(struct ifaddr *ifa)
 1796 {
 1797         KASSERT(ifa != NULL);
 1798         KASSERTMSG(ifa->ifa_refcnt > 0, "ifa_refcnt=%d", ifa->ifa_refcnt);
 1799 
 1800 #ifndef __HAVE_ATOMIC_AS_MEMBAR
 1801         membar_release();
 1802 #endif
 1803         if (atomic_dec_uint_nv(&ifa->ifa_refcnt) != 0)
 1804                 return;
 1805 #ifndef __HAVE_ATOMIC_AS_MEMBAR
 1806         membar_acquire();
 1807 #endif
 1808         free(ifa, M_IFADDR);
 1809 }
 1810 
 1811 bool
 1812 ifa_is_destroying(struct ifaddr *ifa)
 1813 {
 1814 
 1815         return ISSET(ifa->ifa_flags, IFA_DESTROYING);
 1816 }
 1817 
 1818 void
 1819 ifa_insert(struct ifnet *ifp, struct ifaddr *ifa)
 1820 {
 1821 
 1822         ifa->ifa_ifp = ifp;
 1823 
 1824         /*
 1825          * Check MP-safety for IFEF_MPSAFE drivers.
 1826          * Check !IFF_RUNNING for initialization routines that normally don't
 1827          * take IFNET_LOCK but it's safe because there is no competitor.
 1828          * XXX there are false positive cases because IFF_RUNNING can be off on
 1829          * if_stop.
 1830          */
 1831         KASSERT(!if_is_mpsafe(ifp) || !ISSET(ifp->if_flags, IFF_RUNNING) ||
 1832             IFNET_LOCKED(ifp));
 1833 
 1834         TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
 1835         IFADDR_ENTRY_INIT(ifa);
 1836         IFADDR_WRITER_INSERT_TAIL(ifp, ifa);
 1837 
 1838         ifaref(ifa);
 1839 }
 1840 
 1841 void
 1842 ifa_remove(struct ifnet *ifp, struct ifaddr *ifa)
 1843 {
 1844 
 1845         KASSERT(ifa->ifa_ifp == ifp);
 1846         /*
 1847          * Check MP-safety for IFEF_MPSAFE drivers.
 1848          * if_is_deactivated indicates ifa_remove is called from if_detach
 1849          * where it is safe even if IFNET_LOCK isn't held.
 1850          */
 1851         KASSERT(!if_is_mpsafe(ifp) || if_is_deactivated(ifp) ||
 1852             IFNET_LOCKED(ifp));
 1853 
 1854         TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
 1855         IFADDR_WRITER_REMOVE(ifa);
 1856 #ifdef NET_MPSAFE
 1857         IFNET_GLOBAL_LOCK();
 1858         pserialize_perform(ifnet_psz);
 1859         IFNET_GLOBAL_UNLOCK();
 1860 #endif
 1861 
 1862 #ifdef NET_MPSAFE
 1863         psref_target_destroy(&ifa->ifa_psref, ifa_psref_class);
 1864 #endif
 1865         IFADDR_ENTRY_DESTROY(ifa);
 1866         ifafree(ifa);
 1867 }
 1868 
 1869 void
 1870 ifa_acquire(struct ifaddr *ifa, struct psref *psref)
 1871 {
 1872 
 1873         PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
 1874         psref_acquire(psref, &ifa->ifa_psref, ifa_psref_class);
 1875 }
 1876 
 1877 void
 1878 ifa_release(struct ifaddr *ifa, struct psref *psref)
 1879 {
 1880 
 1881         if (ifa == NULL)
 1882                 return;
 1883 
 1884         psref_release(psref, &ifa->ifa_psref, ifa_psref_class);
 1885 }
 1886 
 1887 bool
 1888 ifa_held(struct ifaddr *ifa)
 1889 {
 1890 
 1891         return psref_held(&ifa->ifa_psref, ifa_psref_class);
 1892 }
 1893 
 1894 static inline int
 1895 equal(const struct sockaddr *sa1, const struct sockaddr *sa2)
 1896 {
 1897 
 1898         return sockaddr_cmp(sa1, sa2) == 0;
 1899 }
 1900 
 1901 /*
 1902  * Locate an interface based on a complete address.
 1903  */
 1904 /*ARGSUSED*/
 1905 struct ifaddr *
 1906 ifa_ifwithaddr(const struct sockaddr *addr)
 1907 {
 1908         struct ifnet *ifp;
 1909         struct ifaddr *ifa;
 1910 
 1911         IFNET_READER_FOREACH(ifp) {
 1912                 if (if_is_deactivated(ifp))
 1913                         continue;
 1914                 IFADDR_READER_FOREACH(ifa, ifp) {
 1915                         if (ifa->ifa_addr->sa_family != addr->sa_family)
 1916                                 continue;
 1917                         if (equal(addr, ifa->ifa_addr))
 1918                                 return ifa;
 1919                         if ((ifp->if_flags & IFF_BROADCAST) &&
 1920                             ifa->ifa_broadaddr &&
 1921                             /* IP6 doesn't have broadcast */
 1922                             ifa->ifa_broadaddr->sa_len != 0 &&
 1923                             equal(ifa->ifa_broadaddr, addr))
 1924                                 return ifa;
 1925                 }
 1926         }
 1927         return NULL;
 1928 }
 1929 
 1930 struct ifaddr *
 1931 ifa_ifwithaddr_psref(const struct sockaddr *addr, struct psref *psref)
 1932 {
 1933         struct ifaddr *ifa;
 1934         int s = pserialize_read_enter();
 1935 
 1936         ifa = ifa_ifwithaddr(addr);
 1937         if (ifa != NULL)
 1938                 ifa_acquire(ifa, psref);
 1939         pserialize_read_exit(s);
 1940 
 1941         return ifa;
 1942 }
 1943 
 1944 /*
 1945  * Locate the point to point interface with a given destination address.
 1946  */
 1947 /*ARGSUSED*/
 1948 struct ifaddr *
 1949 ifa_ifwithdstaddr(const struct sockaddr *addr)
 1950 {
 1951         struct ifnet *ifp;
 1952         struct ifaddr *ifa;
 1953 
 1954         IFNET_READER_FOREACH(ifp) {
 1955                 if (if_is_deactivated(ifp))
 1956                         continue;
 1957                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 1958                         continue;
 1959                 IFADDR_READER_FOREACH(ifa, ifp) {
 1960                         if (ifa->ifa_addr->sa_family != addr->sa_family ||
 1961                             ifa->ifa_dstaddr == NULL)
 1962                                 continue;
 1963                         if (equal(addr, ifa->ifa_dstaddr))
 1964                                 return ifa;
 1965                 }
 1966         }
 1967 
 1968         return NULL;
 1969 }
 1970 
 1971 struct ifaddr *
 1972 ifa_ifwithdstaddr_psref(const struct sockaddr *addr, struct psref *psref)
 1973 {
 1974         struct ifaddr *ifa;
 1975         int s;
 1976 
 1977         s = pserialize_read_enter();
 1978         ifa = ifa_ifwithdstaddr(addr);
 1979         if (ifa != NULL)
 1980                 ifa_acquire(ifa, psref);
 1981         pserialize_read_exit(s);
 1982 
 1983         return ifa;
 1984 }
 1985 
 1986 /*
 1987  * Find an interface on a specific network.  If many, choice
 1988  * is most specific found.
 1989  */
 1990 struct ifaddr *
 1991 ifa_ifwithnet(const struct sockaddr *addr)
 1992 {
 1993         struct ifnet *ifp;
 1994         struct ifaddr *ifa, *ifa_maybe = NULL;
 1995         const struct sockaddr_dl *sdl;
 1996         u_int af = addr->sa_family;
 1997         const char *addr_data = addr->sa_data, *cplim;
 1998 
 1999         if (af == AF_LINK) {
 2000                 sdl = satocsdl(addr);
 2001                 if (sdl->sdl_index && sdl->sdl_index < if_indexlim &&
 2002                     ifindex2ifnet[sdl->sdl_index] &&
 2003                     !if_is_deactivated(ifindex2ifnet[sdl->sdl_index])) {
 2004                         return ifindex2ifnet[sdl->sdl_index]->if_dl;
 2005                 }
 2006         }
 2007 #ifdef NETATALK
 2008         if (af == AF_APPLETALK) {
 2009                 const struct sockaddr_at *sat, *sat2;
 2010                 sat = (const struct sockaddr_at *)addr;
 2011                 IFNET_READER_FOREACH(ifp) {
 2012                         if (if_is_deactivated(ifp))
 2013                                 continue;
 2014                         ifa = at_ifawithnet((const struct sockaddr_at *)addr,
 2015                             ifp);
 2016                         if (ifa == NULL)
 2017                                 continue;
 2018                         sat2 = (struct sockaddr_at *)ifa->ifa_addr;
 2019                         if (sat2->sat_addr.s_net == sat->sat_addr.s_net)
 2020                                 return ifa; /* exact match */
 2021                         if (ifa_maybe == NULL) {
 2022                                 /* else keep the if with the right range */
 2023                                 ifa_maybe = ifa;
 2024                         }
 2025                 }
 2026                 return ifa_maybe;
 2027         }
 2028 #endif
 2029         IFNET_READER_FOREACH(ifp) {
 2030                 if (if_is_deactivated(ifp))
 2031                         continue;
 2032                 IFADDR_READER_FOREACH(ifa, ifp) {
 2033                         const char *cp, *cp2, *cp3;
 2034 
 2035                         if (ifa->ifa_addr->sa_family != af ||
 2036                             ifa->ifa_netmask == NULL)
 2037  next:                          continue;
 2038                         cp = addr_data;
 2039                         cp2 = ifa->ifa_addr->sa_data;
 2040                         cp3 = ifa->ifa_netmask->sa_data;
 2041                         cplim = (const char *)ifa->ifa_netmask +
 2042                             ifa->ifa_netmask->sa_len;
 2043                         while (cp3 < cplim) {
 2044                                 if ((*cp++ ^ *cp2++) & *cp3++) {
 2045                                         /* want to continue for() loop */
 2046                                         goto next;
 2047                                 }
 2048                         }
 2049                         if (ifa_maybe == NULL ||
 2050                             rt_refines(ifa->ifa_netmask,
 2051                                        ifa_maybe->ifa_netmask))
 2052                                 ifa_maybe = ifa;
 2053                 }
 2054         }
 2055         return ifa_maybe;
 2056 }
 2057 
 2058 struct ifaddr *
 2059 ifa_ifwithnet_psref(const struct sockaddr *addr, struct psref *psref)
 2060 {
 2061         struct ifaddr *ifa;
 2062         int s;
 2063 
 2064         s = pserialize_read_enter();
 2065         ifa = ifa_ifwithnet(addr);
 2066         if (ifa != NULL)
 2067                 ifa_acquire(ifa, psref);
 2068         pserialize_read_exit(s);
 2069 
 2070         return ifa;
 2071 }
 2072 
 2073 /*
 2074  * Find the interface of the address.
 2075  */
 2076 struct ifaddr *
 2077 ifa_ifwithladdr(const struct sockaddr *addr)
 2078 {
 2079         struct ifaddr *ia;
 2080 
 2081         if ((ia = ifa_ifwithaddr(addr)) || (ia = ifa_ifwithdstaddr(addr)) ||
 2082             (ia = ifa_ifwithnet(addr)))
 2083                 return ia;
 2084         return NULL;
 2085 }
 2086 
 2087 struct ifaddr *
 2088 ifa_ifwithladdr_psref(const struct sockaddr *addr, struct psref *psref)
 2089 {
 2090         struct ifaddr *ifa;
 2091         int s;
 2092 
 2093         s = pserialize_read_enter();
 2094         ifa = ifa_ifwithladdr(addr);
 2095         if (ifa != NULL)
 2096                 ifa_acquire(ifa, psref);
 2097         pserialize_read_exit(s);
 2098 
 2099         return ifa;
 2100 }
 2101 
 2102 /*
 2103  * Find an interface using a specific address family
 2104  */
 2105 struct ifaddr *
 2106 ifa_ifwithaf(int af)
 2107 {
 2108         struct ifnet *ifp;
 2109         struct ifaddr *ifa = NULL;
 2110         int s;
 2111 
 2112         s = pserialize_read_enter();
 2113         IFNET_READER_FOREACH(ifp) {
 2114                 if (if_is_deactivated(ifp))
 2115                         continue;
 2116                 IFADDR_READER_FOREACH(ifa, ifp) {
 2117                         if (ifa->ifa_addr->sa_family == af)
 2118                                 goto out;
 2119                 }
 2120         }
 2121 out:
 2122         pserialize_read_exit(s);
 2123         return ifa;
 2124 }
 2125 
 2126 /*
 2127  * Find an interface address specific to an interface best matching
 2128  * a given address.
 2129  */
 2130 struct ifaddr *
 2131 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
 2132 {
 2133         struct ifaddr *ifa;
 2134         const char *cp, *cp2, *cp3;
 2135         const char *cplim;
 2136         struct ifaddr *ifa_maybe = 0;
 2137         u_int af = addr->sa_family;
 2138 
 2139         if (if_is_deactivated(ifp))
 2140                 return NULL;
 2141 
 2142         if (af >= AF_MAX)
 2143                 return NULL;
 2144 
 2145         IFADDR_READER_FOREACH(ifa, ifp) {
 2146                 if (ifa->ifa_addr->sa_family != af)
 2147                         continue;
 2148                 ifa_maybe = ifa;
 2149                 if (ifa->ifa_netmask == NULL) {
 2150                         if (equal(addr, ifa->ifa_addr) ||
 2151                             (ifa->ifa_dstaddr &&
 2152                              equal(addr, ifa->ifa_dstaddr)))
 2153                                 return ifa;
 2154                         continue;
 2155                 }
 2156                 cp = addr->sa_data;
 2157                 cp2 = ifa->ifa_addr->sa_data;
 2158                 cp3 = ifa->ifa_netmask->sa_data;
 2159                 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
 2160                 for (; cp3 < cplim; cp3++) {
 2161                         if ((*cp++ ^ *cp2++) & *cp3)
 2162                                 break;
 2163                 }
 2164                 if (cp3 == cplim)
 2165                         return ifa;
 2166         }
 2167         return ifa_maybe;
 2168 }
 2169 
 2170 struct ifaddr *
 2171 ifaof_ifpforaddr_psref(const struct sockaddr *addr, struct ifnet *ifp,
 2172     struct psref *psref)
 2173 {
 2174         struct ifaddr *ifa;
 2175         int s;
 2176 
 2177         s = pserialize_read_enter();
 2178         ifa = ifaof_ifpforaddr(addr, ifp);
 2179         if (ifa != NULL)
 2180                 ifa_acquire(ifa, psref);
 2181         pserialize_read_exit(s);
 2182 
 2183         return ifa;
 2184 }
 2185 
 2186 /*
 2187  * Default action when installing a route with a Link Level gateway.
 2188  * Lookup an appropriate real ifa to point to.
 2189  * This should be moved to /sys/net/link.c eventually.
 2190  */
 2191 void
 2192 link_rtrequest(int cmd, struct rtentry *rt, const struct rt_addrinfo *info)
 2193 {
 2194         struct ifaddr *ifa;
 2195         const struct sockaddr *dst;
 2196         struct ifnet *ifp;
 2197         struct psref psref;
 2198 
 2199         if (cmd != RTM_ADD || ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
 2200                 return;
 2201         ifp = rt->rt_ifa->ifa_ifp;
 2202         dst = rt_getkey(rt);
 2203         if ((ifa = ifaof_ifpforaddr_psref(dst, ifp, &psref)) != NULL) {
 2204                 rt_replace_ifa(rt, ifa);
 2205                 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
 2206                         ifa->ifa_rtrequest(cmd, rt, info);
 2207                 ifa_release(ifa, &psref);
 2208         }
 2209 }
 2210 
 2211 /*
 2212  * bitmask macros to manage a densely packed link_state change queue.
 2213  * Because we need to store LINK_STATE_UNKNOWN(0), LINK_STATE_DOWN(1) and
 2214  * LINK_STATE_UP(2) we need 2 bits for each state change.
 2215  * As a state change to store is 0, treat all bits set as an unset item.
 2216  */
 2217 #define LQ_ITEM_BITS            2
 2218 #define LQ_ITEM_MASK            ((1 << LQ_ITEM_BITS) - 1)
 2219 #define LQ_MASK(i)              (LQ_ITEM_MASK << (i) * LQ_ITEM_BITS)
 2220 #define LINK_STATE_UNSET        LQ_ITEM_MASK
 2221 #define LQ_ITEM(q, i)           (((q) & LQ_MASK((i))) >> (i) * LQ_ITEM_BITS)
 2222 #define LQ_STORE(q, i, v)                                                     \
 2223         do {                                                                  \
 2224                 (q) &= ~LQ_MASK((i));                                         \
 2225                 (q) |= (v) << (i) * LQ_ITEM_BITS;                             \
 2226         } while (0 /* CONSTCOND */)
 2227 #define LQ_MAX(q)               ((sizeof((q)) * NBBY) / LQ_ITEM_BITS)
 2228 #define LQ_POP(q, v)                                                          \
 2229         do {                                                                  \
 2230                 (v) = LQ_ITEM((q), 0);                                        \
 2231                 (q) >>= LQ_ITEM_BITS;                                         \
 2232                 (q) |= LINK_STATE_UNSET << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS;  \
 2233         } while (0 /* CONSTCOND */)
 2234 #define LQ_PUSH(q, v)                                                         \
 2235         do {                                                                  \
 2236                 (q) >>= LQ_ITEM_BITS;                                         \
 2237                 (q) |= (v) << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS;               \
 2238         } while (0 /* CONSTCOND */)
 2239 #define LQ_FIND_UNSET(q, i)                                                   \
 2240         for ((i) = 0; i < LQ_MAX((q)); (i)++) {                               \
 2241                 if (LQ_ITEM((q), (i)) == LINK_STATE_UNSET)                    \
 2242                         break;                                                \
 2243         }
 2244 
 2245 /*
 2246  * Handle a change in the interface link state and
 2247  * queue notifications.
 2248  */
 2249 void
 2250 if_link_state_change(struct ifnet *ifp, int link_state)
 2251 {
 2252         int idx;
 2253 
 2254         /* Ensure change is to a valid state */
 2255         switch (link_state) {
 2256         case LINK_STATE_UNKNOWN:        /* FALLTHROUGH */
 2257         case LINK_STATE_DOWN:           /* FALLTHROUGH */
 2258         case LINK_STATE_UP:
 2259                 break;
 2260         default:
 2261 #ifdef DEBUG
 2262                 printf("%s: invalid link state %d\n",
 2263                     ifp->if_xname, link_state);
 2264 #endif
 2265                 return;
 2266         }
 2267 
 2268         IF_LINK_STATE_CHANGE_LOCK(ifp);
 2269 
 2270         /* Find the last unset event in the queue. */
 2271         LQ_FIND_UNSET(ifp->if_link_queue, idx);
 2272 
 2273         if (idx == 0) {
 2274                 /*
 2275                  * There is no queue of link state changes.
 2276                  * As we have the lock we can safely compare against the
 2277                  * current link state and return if the same.
 2278                  * Otherwise, if scheduled is true then the interface is being
 2279                  * detached and the queue is being drained so we need
 2280                  * to avoid queuing more work.
 2281                  */
 2282                  if (ifp->if_link_state == link_state ||
 2283                      ifp->if_link_scheduled)
 2284                         goto out;
 2285         } else {
 2286                 /* Ensure link_state doesn't match the last queued state. */
 2287                 if (LQ_ITEM(ifp->if_link_queue, idx - 1)
 2288                     == (uint8_t)link_state)
 2289                         goto out;
 2290         }
 2291 
 2292         /* Handle queue overflow. */
 2293         if (idx == LQ_MAX(ifp->if_link_queue)) {
 2294                 uint8_t lost;
 2295 
 2296                 /*
 2297                  * The DOWN state must be protected from being pushed off
 2298                  * the queue to ensure that userland will always be
 2299                  * in a sane state.
 2300                  * Because DOWN is protected, there is no need to protect
 2301                  * UNKNOWN.
 2302                  * It should be invalid to change from any other state to
 2303                  * UNKNOWN anyway ...
 2304                  */
 2305                 lost = LQ_ITEM(ifp->if_link_queue, 0);
 2306                 LQ_PUSH(ifp->if_link_queue, (uint8_t)link_state);
 2307                 if (lost == LINK_STATE_DOWN) {
 2308                         lost = LQ_ITEM(ifp->if_link_queue, 0);
 2309                         LQ_STORE(ifp->if_link_queue, 0, LINK_STATE_DOWN);
 2310                 }
 2311                 printf("%s: lost link state change %s\n",
 2312                     ifp->if_xname,
 2313                     lost == LINK_STATE_UP ? "UP" :
 2314                     lost == LINK_STATE_DOWN ? "DOWN" :
 2315                     "UNKNOWN");
 2316         } else
 2317                 LQ_STORE(ifp->if_link_queue, idx, (uint8_t)link_state);
 2318 
 2319         if (ifp->if_link_scheduled)
 2320                 goto out;
 2321 
 2322         ifp->if_link_scheduled = true;
 2323         workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work, NULL);
 2324 
 2325 out:
 2326         IF_LINK_STATE_CHANGE_UNLOCK(ifp);
 2327 }
 2328 
 2329 /*
 2330  * Handle interface link state change notifications.
 2331  */
 2332 static void
 2333 if_link_state_change_process(struct ifnet *ifp, int link_state)
 2334 {
 2335         struct domain *dp;
 2336         const int s = splnet();
 2337         bool notify;
 2338 
 2339         KASSERT(!cpu_intr_p());
 2340 
 2341         IF_LINK_STATE_CHANGE_LOCK(ifp);
 2342 
 2343         /* Ensure the change is still valid. */
 2344         if (ifp->if_link_state == link_state) {
 2345                 IF_LINK_STATE_CHANGE_UNLOCK(ifp);
 2346                 splx(s);
 2347                 return;
 2348         }
 2349 
 2350 #ifdef DEBUG
 2351         log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname,
 2352                 link_state == LINK_STATE_UP ? "UP" :
 2353                 link_state == LINK_STATE_DOWN ? "DOWN" :
 2354                 "UNKNOWN",
 2355                 ifp->if_link_state == LINK_STATE_UP ? "UP" :
 2356                 ifp->if_link_state == LINK_STATE_DOWN ? "DOWN" :
 2357                 "UNKNOWN");
 2358 #endif
 2359 
 2360         /*
 2361          * When going from UNKNOWN to UP, we need to mark existing
 2362          * addresses as tentative and restart DAD as we may have
 2363          * erroneously not found a duplicate.
 2364          *
 2365          * This needs to happen before rt_ifmsg to avoid a race where
 2366          * listeners would have an address and expect it to work right
 2367          * away.
 2368          */
 2369         notify = (link_state == LINK_STATE_UP &&
 2370             ifp->if_link_state == LINK_STATE_UNKNOWN);
 2371         ifp->if_link_state = link_state;
 2372         /* The following routines may sleep so release the spin mutex */
 2373         IF_LINK_STATE_CHANGE_UNLOCK(ifp);
 2374 
 2375         KERNEL_LOCK_UNLESS_NET_MPSAFE();
 2376         if (notify) {
 2377                 DOMAIN_FOREACH(dp) {
 2378                         if (dp->dom_if_link_state_change != NULL)
 2379                                 dp->dom_if_link_state_change(ifp,
 2380                                     LINK_STATE_DOWN);
 2381                 }
 2382         }
 2383 
 2384         /* Notify that the link state has changed. */
 2385         rt_ifmsg(ifp);
 2386 
 2387         simplehook_dohooks(ifp->if_linkstate_hooks);
 2388 
 2389         DOMAIN_FOREACH(dp) {
 2390                 if (dp->dom_if_link_state_change != NULL)
 2391                         dp->dom_if_link_state_change(ifp, link_state);
 2392         }
 2393         KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
 2394         splx(s);
 2395 }
 2396 
 2397 /*
 2398  * Process the interface link state change queue.
 2399  */
 2400 static void
 2401 if_link_state_change_work(struct work *work, void *arg)
 2402 {
 2403         struct ifnet *ifp = container_of(work, struct ifnet, if_link_work);
 2404         uint8_t state;
 2405 
 2406         KERNEL_LOCK_UNLESS_NET_MPSAFE();
 2407         const int s = splnet();
 2408 
 2409         /*
 2410          * Pop a link state change from the queue and process it.
 2411          * If there is nothing to process then if_detach() has been called.
 2412          * We keep if_link_scheduled = true so the queue can safely drain
 2413          * without more work being queued.
 2414          */
 2415         IF_LINK_STATE_CHANGE_LOCK(ifp);
 2416         LQ_POP(ifp->if_link_queue, state);
 2417         IF_LINK_STATE_CHANGE_UNLOCK(ifp);
 2418         if (state == LINK_STATE_UNSET)
 2419                 goto out;
 2420 
 2421         if_link_state_change_process(ifp, state);
 2422 
 2423         /* If there is a link state change to come, schedule it. */
 2424         IF_LINK_STATE_CHANGE_LOCK(ifp);
 2425         if (LQ_ITEM(ifp->if_link_queue, 0) != LINK_STATE_UNSET) {
 2426                 ifp->if_link_scheduled = true;
 2427                 workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work,
 2428                     NULL);
 2429         } else
 2430                 ifp->if_link_scheduled = false;
 2431         IF_LINK_STATE_CHANGE_UNLOCK(ifp);
 2432 
 2433 out:
 2434         splx(s);
 2435         KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
 2436 }
 2437 
 2438 void *
 2439 if_linkstate_change_establish(struct ifnet *ifp, void (*fn)(void *), void *arg)
 2440 {
 2441         khook_t *hk;
 2442 
 2443         hk = simplehook_establish(ifp->if_linkstate_hooks, fn, arg);
 2444 
 2445         return (void *)hk;
 2446 }
 2447 
 2448 void
 2449 if_linkstate_change_disestablish(struct ifnet *ifp, void *vhook,
 2450     kmutex_t *lock)
 2451 {
 2452 
 2453         simplehook_disestablish(ifp->if_linkstate_hooks, vhook, lock);
 2454 }
 2455 
 2456 /*
 2457  * Used to mark addresses on an interface as DETATCHED or TENTATIVE
 2458  * and thus start Duplicate Address Detection without changing the
 2459  * real link state.
 2460  */
 2461 void
 2462 if_domain_link_state_change(struct ifnet *ifp, int link_state)
 2463 {
 2464         struct domain *dp;
 2465 
 2466         const int s = splnet();
 2467         KERNEL_LOCK_UNLESS_NET_MPSAFE();
 2468 
 2469         DOMAIN_FOREACH(dp) {
 2470                 if (dp->dom_if_link_state_change != NULL)
 2471                         dp->dom_if_link_state_change(ifp, link_state);
 2472         }
 2473 
 2474         splx(s);
 2475         KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
 2476 }
 2477 
 2478 /*
 2479  * Default action when installing a local route on a point-to-point
 2480  * interface.
 2481  */
 2482 void
 2483 p2p_rtrequest(int req, struct rtentry *rt,
 2484     __unused const struct rt_addrinfo *info)
 2485 {
 2486         struct ifnet *ifp = rt->rt_ifp;
 2487         struct ifaddr *ifa, *lo0ifa;
 2488         int s = pserialize_read_enter();
 2489 
 2490         switch (req) {
 2491         case RTM_ADD:
 2492                 if ((rt->rt_flags & RTF_LOCAL) == 0)
 2493                         break;
 2494 
 2495                 rt->rt_ifp = lo0ifp;
 2496 
 2497                 if (ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
 2498                         break;
 2499 
 2500                 IFADDR_READER_FOREACH(ifa, ifp) {
 2501                         if (equal(rt_getkey(rt), ifa->ifa_addr))
 2502                                 break;
 2503                 }
 2504                 if (ifa == NULL)
 2505                         break;
 2506 
 2507                 /*
 2508                  * Ensure lo0 has an address of the same family.
 2509                  */
 2510                 IFADDR_READER_FOREACH(lo0ifa, lo0ifp) {
 2511                         if (lo0ifa->ifa_addr->sa_family ==
 2512                             ifa->ifa_addr->sa_family)
 2513                                 break;
 2514                 }
 2515                 if (lo0ifa == NULL)
 2516                         break;
 2517 
 2518                 /*
 2519                  * Make sure to set rt->rt_ifa to the interface
 2520                  * address we are using, otherwise we will have trouble
 2521                  * with source address selection.
 2522                  */
 2523                 if (ifa != rt->rt_ifa)
 2524                         rt_replace_ifa(rt, ifa);
 2525                 break;
 2526         case RTM_DELETE:
 2527         default:
 2528                 break;
 2529         }
 2530         pserialize_read_exit(s);
 2531 }
 2532 
 2533 static void
 2534 _if_down(struct ifnet *ifp)
 2535 {
 2536         struct ifaddr *ifa;
 2537         struct domain *dp;
 2538         struct psref psref;
 2539 
 2540         ifp->if_flags &= ~IFF_UP;
 2541         nanotime(&ifp->if_lastchange);
 2542 
 2543         const int bound = curlwp_bind();
 2544         int s = pserialize_read_enter();
 2545         IFADDR_READER_FOREACH(ifa, ifp) {
 2546                 ifa_acquire(ifa, &psref);
 2547                 pserialize_read_exit(s);
 2548 
 2549                 pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
 2550 
 2551                 s = pserialize_read_enter();
 2552                 ifa_release(ifa, &psref);
 2553         }
 2554         pserialize_read_exit(s);
 2555         curlwp_bindx(bound);
 2556 
 2557         IFQ_PURGE(&ifp->if_snd);
 2558 #if NCARP > 0
 2559         if (ifp->if_carp)
 2560                 carp_carpdev_state(ifp);
 2561 #endif
 2562         rt_ifmsg(ifp);
 2563         DOMAIN_FOREACH(dp) {
 2564                 if (dp->dom_if_down)
 2565                         dp->dom_if_down(ifp);
 2566         }
 2567 }
 2568 
 2569 static void
 2570 if_down_deactivated(struct ifnet *ifp)
 2571 {
 2572 
 2573         KASSERT(if_is_deactivated(ifp));
 2574         _if_down(ifp);
 2575 }
 2576 
 2577 void
 2578 if_down_locked(struct ifnet *ifp)
 2579 {
 2580 
 2581         KASSERT(IFNET_LOCKED(ifp));
 2582         _if_down(ifp);
 2583 }
 2584 
 2585 /*
 2586  * Mark an interface down and notify protocols of
 2587  * the transition.
 2588  * NOTE: must be called at splsoftnet or equivalent.
 2589  */
 2590 void
 2591 if_down(struct ifnet *ifp)
 2592 {
 2593 
 2594         IFNET_LOCK(ifp);
 2595         if_down_locked(ifp);
 2596         IFNET_UNLOCK(ifp);
 2597 }
 2598 
 2599 /*
 2600  * Must be called with holding if_ioctl_lock.
 2601  */
 2602 static void
 2603 if_up_locked(struct ifnet *ifp)
 2604 {
 2605 #ifdef notyet
 2606         struct ifaddr *ifa;
 2607 #endif
 2608         struct domain *dp;
 2609 
 2610         KASSERT(IFNET_LOCKED(ifp));
 2611 
 2612         KASSERT(!if_is_deactivated(ifp));
 2613         ifp->if_flags |= IFF_UP;
 2614         nanotime(&ifp->if_lastchange);
 2615 #ifdef notyet
 2616         /* this has no effect on IP, and will kill all ISO connections XXX */
 2617         IFADDR_READER_FOREACH(ifa, ifp)
 2618                 pfctlinput(PRC_IFUP, ifa->ifa_addr);
 2619 #endif
 2620 #if NCARP > 0
 2621         if (ifp->if_carp)
 2622                 carp_carpdev_state(ifp);
 2623 #endif
 2624         rt_ifmsg(ifp);
 2625         DOMAIN_FOREACH(dp) {
 2626                 if (dp->dom_if_up)
 2627                         dp->dom_if_up(ifp);
 2628         }
 2629 }
 2630 
 2631 /*
 2632  * Handle interface slowtimo timer routine.  Called
 2633  * from softclock, we decrement timer (if set) and
 2634  * call the appropriate interface routine on expiration.
 2635  */
 2636 static bool
 2637 if_slowtimo_countdown(struct ifnet *ifp)
 2638 {
 2639         bool fire = false;
 2640         const int s = splnet();
 2641 
 2642         KERNEL_LOCK(1, NULL);
 2643         if (ifp->if_timer != 0 && --ifp->if_timer == 0)
 2644                 fire = true;
 2645         KERNEL_UNLOCK_ONE(NULL);
 2646         splx(s);
 2647 
 2648         return fire;
 2649 }
 2650 
 2651 static void
 2652 if_slowtimo_intr(void *arg)
 2653 {
 2654         struct ifnet *ifp = arg;
 2655         struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
 2656 
 2657         mutex_enter(&isd->isd_lock);
 2658         if (!isd->isd_dying) {
 2659                 if (isd->isd_trigger || if_slowtimo_countdown(ifp)) {
 2660                         if (!isd->isd_queued) {
 2661                                 isd->isd_queued = true;
 2662                                 workqueue_enqueue(if_slowtimo_wq,
 2663                                     &isd->isd_work, NULL);
 2664                         }
 2665                 } else
 2666                         callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
 2667         }
 2668         mutex_exit(&isd->isd_lock);
 2669 }
 2670 
 2671 static void
 2672 if_slowtimo_work(struct work *work, void *arg)
 2673 {
 2674         struct if_slowtimo_data *isd =
 2675             container_of(work, struct if_slowtimo_data, isd_work);
 2676         struct ifnet *ifp = isd->isd_ifp;
 2677         const int s = splnet();
 2678 
 2679         KERNEL_LOCK(1, NULL);
 2680         (*ifp->if_slowtimo)(ifp);
 2681         KERNEL_UNLOCK_ONE(NULL);
 2682         splx(s);
 2683 
 2684         mutex_enter(&isd->isd_lock);
 2685         if (isd->isd_trigger) {
 2686                 isd->isd_trigger = false;
 2687                 printf("%s: watchdog triggered\n", ifp->if_xname);
 2688         }
 2689         isd->isd_queued = false;
 2690         if (!isd->isd_dying)
 2691                 callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
 2692         mutex_exit(&isd->isd_lock);
 2693 }
 2694 
 2695 static int
 2696 sysctl_if_watchdog(SYSCTLFN_ARGS)
 2697 {
 2698         struct sysctlnode node = *rnode;
 2699         struct ifnet *ifp = node.sysctl_data;
 2700         struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
 2701         int arg = 0;
 2702         int error;
 2703 
 2704         node.sysctl_data = &arg;
 2705         error = sysctl_lookup(SYSCTLFN_CALL(&node));
 2706         if (error || newp == NULL)
 2707                 return error;
 2708         if (arg) {
 2709                 mutex_enter(&isd->isd_lock);
 2710                 KASSERT(!isd->isd_dying);
 2711                 isd->isd_trigger = true;
 2712                 callout_schedule(&isd->isd_ch, 0);
 2713                 mutex_exit(&isd->isd_lock);
 2714         }
 2715 
 2716         return 0;
 2717 }
 2718 
 2719 static void
 2720 sysctl_watchdog_setup(struct ifnet *ifp)
 2721 {
 2722         struct sysctllog **clog = &ifp->if_sysctl_log;
 2723         const struct sysctlnode *rnode;
 2724 
 2725         if (sysctl_createv(clog, 0, NULL, &rnode,
 2726                 CTLFLAG_PERMANENT, CTLTYPE_NODE, "interfaces",
 2727                 SYSCTL_DESCR("Per-interface controls"),
 2728                 NULL, 0, NULL, 0,
 2729                 CTL_NET, CTL_CREATE, CTL_EOL) != 0)
 2730                 goto bad;
 2731         if (sysctl_createv(clog, 0, &rnode, &rnode,
 2732                 CTLFLAG_PERMANENT, CTLTYPE_NODE, ifp->if_xname,
 2733                 SYSCTL_DESCR("Interface controls"),
 2734                 NULL, 0, NULL, 0,
 2735                 CTL_CREATE, CTL_EOL) != 0)
 2736                 goto bad;
 2737         if (sysctl_createv(clog, 0, &rnode, &rnode,
 2738                 CTLFLAG_PERMANENT, CTLTYPE_NODE, "watchdog",
 2739                 SYSCTL_DESCR("Interface watchdog controls"),
 2740                 NULL, 0, NULL, 0,
 2741                 CTL_CREATE, CTL_EOL) != 0)
 2742                 goto bad;
 2743         if (sysctl_createv(clog, 0, &rnode, NULL,
 2744                 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "trigger",
 2745                 SYSCTL_DESCR("Trigger watchdog timeout"),
 2746                 sysctl_if_watchdog, 0, (int *)ifp, 0,
 2747                 CTL_CREATE, CTL_EOL) != 0)
 2748                 goto bad;
 2749 
 2750         return;
 2751 
 2752 bad:
 2753         printf("%s: could not attach sysctl watchdog nodes\n", ifp->if_xname);
 2754 }
 2755 
 2756 /*
 2757  * Mark an interface up and notify protocols of
 2758  * the transition.
 2759  * NOTE: must be called at splsoftnet or equivalent.
 2760  */
 2761 void
 2762 if_up(struct ifnet *ifp)
 2763 {
 2764 
 2765         IFNET_LOCK(ifp);
 2766         if_up_locked(ifp);
 2767         IFNET_UNLOCK(ifp);
 2768 }
 2769 
 2770 /*
 2771  * Set/clear promiscuous mode on interface ifp based on the truth value
 2772  * of pswitch.  The calls are reference counted so that only the first
 2773  * "on" request actually has an effect, as does the final "off" request.
 2774  * Results are undefined if the "off" and "on" requests are not matched.
 2775  */
 2776 int
 2777 ifpromisc_locked(struct ifnet *ifp, int pswitch)
 2778 {
 2779         int pcount, ret = 0;
 2780         u_short nflags;
 2781 
 2782         KASSERT(IFNET_LOCKED(ifp));
 2783 
 2784         pcount = ifp->if_pcount;
 2785         if (pswitch) {
 2786                 /*
 2787                  * Allow the device to be "placed" into promiscuous
 2788                  * mode even if it is not configured up.  It will
 2789                  * consult IFF_PROMISC when it is brought up.
 2790                  */
 2791                 if (ifp->if_pcount++ != 0)
 2792                         goto out;
 2793                 nflags = ifp->if_flags | IFF_PROMISC;
 2794         } else {
 2795                 if (--ifp->if_pcount > 0)
 2796                         goto out;
 2797                 nflags = ifp->if_flags & ~IFF_PROMISC;
 2798         }
 2799         ret = if_flags_set(ifp, nflags);
 2800         /* Restore interface state if not successful. */
 2801         if (ret != 0)
 2802                 ifp->if_pcount = pcount;
 2803 
 2804 out:
 2805         return ret;
 2806 }
 2807 
 2808 int
 2809 ifpromisc(struct ifnet *ifp, int pswitch)
 2810 {
 2811         int e;
 2812 
 2813         IFNET_LOCK(ifp);
 2814         e = ifpromisc_locked(ifp, pswitch);
 2815         IFNET_UNLOCK(ifp);
 2816 
 2817         return e;
 2818 }
 2819 
 2820 /*
 2821  * if_ioctl(ifp, cmd, data)
 2822  *
 2823  *      Apply an ioctl command to the interface.  Returns 0 on success,
 2824  *      nonzero errno(3) number on failure.
 2825  *
 2826  *      For SIOCADDMULTI/SIOCDELMULTI, caller need not hold locks -- it
 2827  *      is the driver's responsibility to take any internal locks.
 2828  *      (Kernel logic should generally invoke these only through
 2829  *      if_mcast_op.)
 2830  *
 2831  *      For all other ioctls, caller must hold ifp->if_ioctl_lock,
 2832  *      a.k.a. IFNET_LOCK.  May sleep.
 2833  */
 2834 int
 2835 if_ioctl(struct ifnet *ifp, u_long cmd, void *data)
 2836 {
 2837 
 2838         switch (cmd) {
 2839         case SIOCADDMULTI:
 2840         case SIOCDELMULTI:
 2841                 break;
 2842         default:
 2843                 KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
 2844         }
 2845 
 2846         return (*ifp->if_ioctl)(ifp, cmd, data);
 2847 }
 2848 
 2849 /*
 2850  * if_init(ifp)
 2851  *
 2852  *      Prepare the hardware underlying ifp to process packets
 2853  *      according to its current configuration.  Returns 0 on success,
 2854  *      nonzero errno(3) number on failure.
 2855  *
 2856  *      May sleep.  Caller must hold ifp->if_ioctl_lock, a.k.a
 2857  *      IFNET_LOCK.
 2858  */
 2859 int
 2860 if_init(struct ifnet *ifp)
 2861 {
 2862 
 2863         KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
 2864 
 2865         return (*ifp->if_init)(ifp);
 2866 }
 2867 
 2868 /*
 2869  * if_stop(ifp, disable)
 2870  *
 2871  *      Stop the hardware underlying ifp from processing packets.
 2872  *
 2873  *      If disable is true, ... XXX(?)
 2874  *
 2875  *      May sleep.  Caller must hold ifp->if_ioctl_lock, a.k.a
 2876  *      IFNET_LOCK.
 2877  */
 2878 void
 2879 if_stop(struct ifnet *ifp, int disable)
 2880 {
 2881 
 2882         KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
 2883 
 2884         (*ifp->if_stop)(ifp, disable);
 2885 }
 2886 
 2887 /*
 2888  * Map interface name to
 2889  * interface structure pointer.
 2890  */
 2891 struct ifnet *
 2892 ifunit(const char *name)
 2893 {
 2894         struct ifnet *ifp;
 2895         const char *cp = name;
 2896         u_int unit = 0;
 2897         u_int i;
 2898 
 2899         /*
 2900          * If the entire name is a number, treat it as an ifindex.
 2901          */
 2902         for (i = 0; i < IFNAMSIZ && *cp >= '' && *cp <= '9'; i++, cp++)
 2903                 unit = unit * 10 + (*cp - '');
 2904 
 2905         /*
 2906          * If the number took all of the name, then it's a valid ifindex.
 2907          */
 2908         if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
 2909                 return if_byindex(unit);
 2910 
 2911         ifp = NULL;
 2912         const int s = pserialize_read_enter();
 2913         IFNET_READER_FOREACH(ifp) {
 2914                 if (if_is_deactivated(ifp))
 2915                         continue;
 2916                 if (strcmp(ifp->if_xname, name) == 0)
 2917                         goto out;
 2918         }
 2919 out:
 2920         pserialize_read_exit(s);
 2921         return ifp;
 2922 }
 2923 
 2924 /*
 2925  * Get a reference of an ifnet object by an interface name.
 2926  * The returned reference is protected by psref(9). The caller
 2927  * must release a returned reference by if_put after use.
 2928  */
 2929 struct ifnet *
 2930 if_get(const char *name, struct psref *psref)
 2931 {
 2932         struct ifnet *ifp;
 2933         const char *cp = name;
 2934         u_int unit = 0;
 2935         u_int i;
 2936 
 2937         /*
 2938          * If the entire name is a number, treat it as an ifindex.
 2939          */
 2940         for (i = 0; i < IFNAMSIZ && *cp >= '' && *cp <= '9'; i++, cp++)
 2941                 unit = unit * 10 + (*cp - '');
 2942 
 2943         /*
 2944          * If the number took all of the name, then it's a valid ifindex.
 2945          */
 2946         if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
 2947                 return if_get_byindex(unit, psref);
 2948 
 2949         ifp = NULL;
 2950         const int s = pserialize_read_enter();
 2951         IFNET_READER_FOREACH(ifp) {
 2952                 if (if_is_deactivated(ifp))
 2953                         continue;
 2954                 if (strcmp(ifp->if_xname, name) == 0) {
 2955                         PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
 2956                         psref_acquire(psref, &ifp->if_psref,
 2957                             ifnet_psref_class);
 2958                         goto out;
 2959                 }
 2960         }
 2961 out:
 2962         pserialize_read_exit(s);
 2963         return ifp;
 2964 }
 2965 
 2966 /*
 2967  * Release a reference of an ifnet object given by if_get, if_get_byindex
 2968  * or if_get_bylla.
 2969  */
 2970 void
 2971 if_put(const struct ifnet *ifp, struct psref *psref)
 2972 {
 2973 
 2974         if (ifp == NULL)
 2975                 return;
 2976 
 2977         psref_release(psref, &ifp->if_psref, ifnet_psref_class);
 2978 }
 2979 
 2980 /*
 2981  * Return ifp having idx. Return NULL if not found.  Normally if_byindex
 2982  * should be used.
 2983  */
 2984 ifnet_t *
 2985 _if_byindex(u_int idx)
 2986 {
 2987 
 2988         return (__predict_true(idx < if_indexlim)) ? ifindex2ifnet[idx] : NULL;
 2989 }
 2990 
 2991 /*
 2992  * Return ifp having idx. Return NULL if not found or the found ifp is
 2993  * already deactivated.
 2994  */
 2995 ifnet_t *
 2996 if_byindex(u_int idx)
 2997 {
 2998         ifnet_t *ifp;
 2999 
 3000         ifp = _if_byindex(idx);
 3001         if (ifp != NULL && if_is_deactivated(ifp))
 3002                 ifp = NULL;
 3003         return ifp;
 3004 }
 3005 
 3006 /*
 3007  * Get a reference of an ifnet object by an interface index.
 3008  * The returned reference is protected by psref(9). The caller
 3009  * must release a returned reference by if_put after use.
 3010  */
 3011 ifnet_t *
 3012 if_get_byindex(u_int idx, struct psref *psref)
 3013 {
 3014         ifnet_t *ifp;
 3015 
 3016         const int s = pserialize_read_enter();
 3017         ifp = if_byindex(idx);
 3018         if (__predict_true(ifp != NULL)) {
 3019                 PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
 3020                 psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
 3021         }
 3022         pserialize_read_exit(s);
 3023 
 3024         return ifp;
 3025 }
 3026 
 3027 ifnet_t *
 3028 if_get_bylla(const void *lla, unsigned char lla_len, struct psref *psref)
 3029 {
 3030         ifnet_t *ifp;
 3031 
 3032         const int s = pserialize_read_enter();
 3033         IFNET_READER_FOREACH(ifp) {
 3034                 if (if_is_deactivated(ifp))
 3035                         continue;
 3036                 if (ifp->if_addrlen != lla_len)
 3037                         continue;
 3038                 if (memcmp(lla, CLLADDR(ifp->if_sadl), lla_len) == 0) {
 3039                         psref_acquire(psref, &ifp->if_psref,
 3040                             ifnet_psref_class);
 3041                         break;
 3042                 }
 3043         }
 3044         pserialize_read_exit(s);
 3045 
 3046         return ifp;
 3047 }
 3048 
 3049 /*
 3050  * Note that it's safe only if the passed ifp is guaranteed to not be freed,
 3051  * for example using pserialize or the ifp is already held or some other
 3052  * object is held which guarantes the ifp to not be freed indirectly.
 3053  */
 3054 void
 3055 if_acquire(struct ifnet *ifp, struct psref *psref)
 3056 {
 3057 
 3058         KASSERT(ifp->if_index != 0);
 3059         psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
 3060 }
 3061 
 3062 bool
 3063 if_held(struct ifnet *ifp)
 3064 {
 3065 
 3066         return psref_held(&ifp->if_psref, ifnet_psref_class);
 3067 }
 3068 
 3069 /*
 3070  * Some tunnel interfaces can nest, e.g. IPv4 over IPv4 gif(4) tunnel over
 3071  * IPv4. Check the tunnel nesting count.
 3072  * Return > 0, if tunnel nesting count is more than limit.
 3073  * Return 0, if tunnel nesting count is equal or less than limit.
 3074  */
 3075 int
 3076 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, int limit)
 3077 {
 3078         struct m_tag *mtag;
 3079         int *count;
 3080 
 3081         mtag = m_tag_find(m, PACKET_TAG_TUNNEL_INFO);
 3082         if (mtag != NULL) {
 3083                 count = (int *)(mtag + 1);
 3084                 if (++(*count) > limit) {
 3085                         log(LOG_NOTICE,
 3086                             "%s: recursively called too many times(%d)\n",
 3087                             ifp->if_xname, *count);
 3088                         return EIO;
 3089                 }
 3090         } else {
 3091                 mtag = m_tag_get(PACKET_TAG_TUNNEL_INFO, sizeof(*count),
 3092                     M_NOWAIT);
 3093                 if (mtag != NULL) {
 3094                         m_tag_prepend(m, mtag);
 3095                         count = (int *)(mtag + 1);
 3096                         *count = 0;
 3097                 } else {
 3098                         log(LOG_DEBUG, "%s: m_tag_get() failed, "
 3099                             "recursion calls are not prevented.\n",
 3100                             ifp->if_xname);
 3101                 }
 3102         }
 3103 
 3104         return 0;
 3105 }
 3106 
 3107 static void
 3108 if_tunnel_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
 3109 {
 3110         struct tunnel_ro *tro = p;
 3111 
 3112         tro->tr_ro = kmem_zalloc(sizeof(*tro->tr_ro), KM_SLEEP);
 3113         tro->tr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
 3114 }
 3115 
 3116 static void
 3117 if_tunnel_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
 3118 {
 3119         struct tunnel_ro *tro = p;
 3120 
 3121         rtcache_free(tro->tr_ro);
 3122         kmem_free(tro->tr_ro, sizeof(*tro->tr_ro));
 3123 
 3124         mutex_obj_free(tro->tr_lock);
 3125 }
 3126 
 3127 percpu_t *
 3128 if_tunnel_alloc_ro_percpu(void)
 3129 {
 3130 
 3131         return percpu_create(sizeof(struct tunnel_ro),
 3132             if_tunnel_ro_init_pc, if_tunnel_ro_fini_pc, NULL);
 3133 }
 3134 
 3135 void
 3136 if_tunnel_free_ro_percpu(percpu_t *ro_percpu)
 3137 {
 3138 
 3139         percpu_free(ro_percpu, sizeof(struct tunnel_ro));
 3140 }
 3141 
 3142 
 3143 static void
 3144 if_tunnel_rtcache_free_pc(void *p, void *arg __unused,
 3145     struct cpu_info *ci __unused)
 3146 {
 3147         struct tunnel_ro *tro = p;
 3148 
 3149         mutex_enter(tro->tr_lock);
 3150         rtcache_free(tro->tr_ro);
 3151         mutex_exit(tro->tr_lock);
 3152 }
 3153 
 3154 void if_tunnel_ro_percpu_rtcache_free(percpu_t *ro_percpu)
 3155 {
 3156 
 3157         percpu_foreach(ro_percpu, if_tunnel_rtcache_free_pc, NULL);
 3158 }
 3159 
 3160 void
 3161 if_export_if_data(ifnet_t * const ifp, struct if_data *ifi, bool zero_stats)
 3162 {
 3163 
 3164         /* Collect the volatile stats first; this zeros *ifi. */
 3165         if_stats_to_if_data(ifp, ifi, zero_stats);
 3166 
 3167         ifi->ifi_type = ifp->if_type;
 3168         ifi->ifi_addrlen = ifp->if_addrlen;
 3169         ifi->ifi_hdrlen = ifp->if_hdrlen;
 3170         ifi->ifi_link_state = ifp->if_link_state;
 3171         ifi->ifi_mtu = ifp->if_mtu;
 3172         ifi->ifi_metric = ifp->if_metric;
 3173         ifi->ifi_baudrate = ifp->if_baudrate;
 3174         ifi->ifi_lastchange = ifp->if_lastchange;
 3175 }
 3176 
 3177 /* common */
 3178 int
 3179 ifioctl_common(struct ifnet *ifp, u_long cmd, void *data)
 3180 {
 3181         struct ifreq *ifr;
 3182         struct ifcapreq *ifcr;
 3183         struct ifdatareq *ifdr;
 3184         unsigned short flags;
 3185         char *descr;
 3186         int error;
 3187 
 3188         switch (cmd) {
 3189         case SIOCSIFCAP:
 3190                 ifcr = data;
 3191                 if ((ifcr->ifcr_capenable & ~ifp->if_capabilities) != 0)
 3192                         return EINVAL;
 3193 
 3194                 if (ifcr->ifcr_capenable == ifp->if_capenable)
 3195                         return 0;
 3196 
 3197                 ifp->if_capenable = ifcr->ifcr_capenable;
 3198 
 3199                 /* Pre-compute the checksum flags mask. */
 3200                 ifp->if_csum_flags_tx = 0;
 3201                 ifp->if_csum_flags_rx = 0;
 3202                 if (ifp->if_capenable & IFCAP_CSUM_IPv4_Tx)
 3203                         ifp->if_csum_flags_tx |= M_CSUM_IPv4;
 3204                 if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx)
 3205                         ifp->if_csum_flags_rx |= M_CSUM_IPv4;
 3206 
 3207                 if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx)
 3208                         ifp->if_csum_flags_tx |= M_CSUM_TCPv4;
 3209                 if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx)
 3210                         ifp->if_csum_flags_rx |= M_CSUM_TCPv4;
 3211 
 3212                 if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx)
 3213                         ifp->if_csum_flags_tx |= M_CSUM_UDPv4;
 3214                 if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx)
 3215                         ifp->if_csum_flags_rx |= M_CSUM_UDPv4;
 3216 
 3217                 if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx)
 3218                         ifp->if_csum_flags_tx |= M_CSUM_TCPv6;
 3219                 if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx)
 3220                         ifp->if_csum_flags_rx |= M_CSUM_TCPv6;
 3221 
 3222                 if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx)
 3223                         ifp->if_csum_flags_tx |= M_CSUM_UDPv6;
 3224                 if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx)
 3225                         ifp->if_csum_flags_rx |= M_CSUM_UDPv6;
 3226 
 3227                 if (ifp->if_capenable & IFCAP_TSOv4)
 3228                         ifp->if_csum_flags_tx |= M_CSUM_TSOv4;
 3229                 if (ifp->if_capenable & IFCAP_TSOv6)
 3230                         ifp->if_csum_flags_tx |= M_CSUM_TSOv6;
 3231 
 3232 #if NBRIDGE > 0
 3233                 if (ifp->if_bridge != NULL)
 3234                         bridge_calc_csum_flags(ifp->if_bridge);
 3235 #endif
 3236 
 3237                 if (ifp->if_flags & IFF_UP)
 3238                         return ENETRESET;
 3239                 return 0;
 3240         case SIOCSIFFLAGS:
 3241                 ifr = data;
 3242                 /*
 3243                  * If if_is_mpsafe(ifp), KERNEL_LOCK isn't held here, but if_up
 3244                  * and if_down aren't MP-safe yet, so we must hold the lock.
 3245                  */
 3246                 KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
 3247                 if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
 3248                         const int s = splsoftnet();
 3249                         if_down_locked(ifp);
 3250                         splx(s);
 3251                 }
 3252                 if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) {
 3253                         const int s = splsoftnet();
 3254                         if_up_locked(ifp);
 3255                         splx(s);
 3256                 }
 3257                 KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
 3258                 flags = (ifp->if_flags & IFF_CANTCHANGE) |
 3259                     (ifr->ifr_flags &~ IFF_CANTCHANGE);
 3260                 if (ifp->if_flags != flags) {
 3261                         ifp->if_flags = flags;
 3262                         /* Notify that the flags have changed. */
 3263                         rt_ifmsg(ifp);
 3264                 }
 3265                 break;
 3266         case SIOCGIFFLAGS:
 3267                 ifr = data;
 3268                 ifr->ifr_flags = ifp->if_flags;
 3269                 break;
 3270 
 3271         case SIOCGIFMETRIC:
 3272                 ifr = data;
 3273                 ifr->ifr_metric = ifp->if_metric;
 3274                 break;
 3275 
 3276         case SIOCGIFMTU:
 3277                 ifr = data;
 3278                 ifr->ifr_mtu = ifp->if_mtu;
 3279                 break;
 3280 
 3281         case SIOCGIFDLT:
 3282                 ifr = data;
 3283                 ifr->ifr_dlt = ifp->if_dlt;
 3284                 break;
 3285 
 3286         case SIOCGIFCAP:
 3287                 ifcr = data;
 3288                 ifcr->ifcr_capabilities = ifp->if_capabilities;
 3289                 ifcr->ifcr_capenable = ifp->if_capenable;
 3290                 break;
 3291 
 3292         case SIOCSIFMETRIC:
 3293                 ifr = data;
 3294                 ifp->if_metric = ifr->ifr_metric;
 3295                 break;
 3296 
 3297         case SIOCGIFDATA:
 3298                 ifdr = data;
 3299                 if_export_if_data(ifp, &ifdr->ifdr_data, false);
 3300                 break;
 3301 
 3302         case SIOCGIFINDEX:
 3303                 ifr = data;
 3304                 ifr->ifr_index = ifp->if_index;
 3305                 break;
 3306 
 3307         case SIOCZIFDATA:
 3308                 ifdr = data;
 3309                 if_export_if_data(ifp, &ifdr->ifdr_data, true);
 3310                 getnanotime(&ifp->if_lastchange);
 3311                 break;
 3312         case SIOCSIFMTU:
 3313                 ifr = data;
 3314                 if (ifp->if_mtu == ifr->ifr_mtu)
 3315                         break;
 3316                 ifp->if_mtu = ifr->ifr_mtu;
 3317                 return ENETRESET;
 3318         case SIOCSIFDESCR:
 3319                 error = kauth_authorize_network(kauth_cred_get(),
 3320                     KAUTH_NETWORK_INTERFACE,
 3321                     KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
 3322                     NULL);
 3323                 if (error)
 3324                         return error;
 3325 
 3326                 ifr = data;
 3327 
 3328                 if (ifr->ifr_buflen > IFDESCRSIZE)
 3329                         return ENAMETOOLONG;
 3330 
 3331                 if (ifr->ifr_buf == NULL || ifr->ifr_buflen == 0) {
 3332                         /* unset description */
 3333                         descr = NULL;
 3334                 } else {
 3335                         descr = kmem_zalloc(IFDESCRSIZE, KM_SLEEP);
 3336                         /*
 3337                          * copy (IFDESCRSIZE - 1) bytes to ensure
 3338                          * terminating nul
 3339                          */
 3340                         error = copyin(ifr->ifr_buf, descr, IFDESCRSIZE - 1);
 3341                         if (error) {
 3342                                 kmem_free(descr, IFDESCRSIZE);
 3343                                 return error;
 3344                         }
 3345                 }
 3346 
 3347                 if (ifp->if_description != NULL)
 3348                         kmem_free(ifp->if_description, IFDESCRSIZE);
 3349 
 3350                 ifp->if_description = descr;
 3351                 break;
 3352 
 3353         case SIOCGIFDESCR:
 3354                 ifr = data;
 3355                 descr = ifp->if_description;
 3356 
 3357                 if (descr == NULL)
 3358                         return ENOMSG;
 3359 
 3360                 if (ifr->ifr_buflen < IFDESCRSIZE)
 3361                         return EINVAL;
 3362 
 3363                 error = copyout(descr, ifr->ifr_buf, IFDESCRSIZE);
 3364                 if (error)
 3365                         return error;
 3366                 break;
 3367 
 3368         default:
 3369                 return ENOTTY;
 3370         }
 3371         return 0;
 3372 }
 3373 
 3374 int
 3375 ifaddrpref_ioctl(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
 3376 {
 3377         struct if_addrprefreq *ifap = (struct if_addrprefreq *)data;
 3378         struct ifaddr *ifa;
 3379         const struct sockaddr *any, *sa;
 3380         union {
 3381                 struct sockaddr sa;
 3382                 struct sockaddr_storage ss;
 3383         } u, v;
 3384         int s, error = 0;
 3385 
 3386         switch (cmd) {
 3387         case SIOCSIFADDRPREF:
 3388                 error = kauth_authorize_network(kauth_cred_get(),
 3389                     KAUTH_NETWORK_INTERFACE,
 3390                     KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
 3391                     NULL);
 3392                 if (error)
 3393                         return error;
 3394                 break;
 3395         case SIOCGIFADDRPREF:
 3396                 break;
 3397         default:
 3398                 return EOPNOTSUPP;
 3399         }
 3400 
 3401         /* sanity checks */
 3402         if (data == NULL || ifp == NULL) {
 3403                 panic("invalid argument to %s", __func__);
 3404                 /*NOTREACHED*/
 3405         }
 3406 
 3407         /* address must be specified on ADD and DELETE */
 3408         sa = sstocsa(&ifap->ifap_addr);
 3409         if (sa->sa_family != sofamily(so))
 3410                 return EINVAL;
 3411         if ((any = sockaddr_any(sa)) == NULL || sa->sa_len != any->sa_len)
 3412                 return EINVAL;
 3413 
 3414         sockaddr_externalize(&v.sa, sizeof(v.ss), sa);
 3415 
 3416         s = pserialize_read_enter();
 3417         IFADDR_READER_FOREACH(ifa, ifp) {
 3418                 if (ifa->ifa_addr->sa_family != sa->sa_family)
 3419                         continue;
 3420                 sockaddr_externalize(&u.sa, sizeof(u.ss), ifa->ifa_addr);
 3421                 if (sockaddr_cmp(&u.sa, &v.sa) == 0)
 3422                         break;
 3423         }
 3424         if (ifa == NULL) {
 3425                 error = EADDRNOTAVAIL;
 3426                 goto out;
 3427         }
 3428 
 3429         switch (cmd) {
 3430         case SIOCSIFADDRPREF:
 3431                 ifa->ifa_preference = ifap->ifap_preference;
 3432                 goto out;
 3433         case SIOCGIFADDRPREF:
 3434                 /* fill in the if_laddrreq structure */
 3435                 (void)sockaddr_copy(sstosa(&ifap->ifap_addr),
 3436                     sizeof(ifap->ifap_addr), ifa->ifa_addr);
 3437                 ifap->ifap_preference = ifa->ifa_preference;
 3438                 goto out;
 3439         default:
 3440                 error = EOPNOTSUPP;
 3441         }
 3442 out:
 3443         pserialize_read_exit(s);
 3444         return error;
 3445 }
 3446 
 3447 /*
 3448  * Interface ioctls.
 3449  */
 3450 static int
 3451 doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l)
 3452 {
 3453         struct ifnet *ifp;
 3454         struct ifreq *ifr;
 3455         int error = 0;
 3456         u_long ocmd = cmd;
 3457         u_short oif_flags;
 3458         struct ifreq ifrb;
 3459         struct oifreq *oifr = NULL;
 3460         int r;
 3461         struct psref psref;
 3462         bool do_if43_post = false;
 3463         bool do_ifm80_post = false;
 3464 
 3465         switch (cmd) {
 3466         case SIOCGIFCONF:
 3467                 return ifconf(cmd, data);
 3468         case SIOCINITIFADDR:
 3469                 return EPERM;
 3470         default:
 3471                 MODULE_HOOK_CALL(uipc_syscalls_40_hook, (cmd, data), enosys(),
 3472                     error);
 3473                 if (error != ENOSYS)
 3474                         return error;
 3475                 MODULE_HOOK_CALL(uipc_syscalls_50_hook, (l, cmd, data),
 3476                     enosys(), error);
 3477                 if (error != ENOSYS)
 3478                         return error;
 3479                 error = 0;
 3480                 break;
 3481         }
 3482 
 3483         ifr = data;
 3484         /* Pre-conversion */
 3485         MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), error);
 3486         if (cmd != ocmd) {
 3487                 oifr = data;
 3488                 data = ifr = &ifrb;
 3489                 IFREQO2N_43(oifr, ifr);
 3490                 do_if43_post = true;
 3491         }
 3492         MODULE_HOOK_CALL(ifmedia_80_pre_hook, (ifr, &cmd, &do_ifm80_post),
 3493             enosys(), error);
 3494 
 3495         switch (cmd) {
 3496         case SIOCIFCREATE:
 3497         case SIOCIFDESTROY: {
 3498                 const int bound = curlwp_bind();
 3499                 if (l != NULL) {
 3500                         ifp = if_get(ifr->ifr_name, &psref);
 3501                         error = kauth_authorize_network(l->l_cred,
 3502                             KAUTH_NETWORK_INTERFACE,
 3503                             KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
 3504                             KAUTH_ARG(cmd), NULL);
 3505                         if (ifp != NULL)
 3506                                 if_put(ifp, &psref);
 3507                         if (error != 0) {
 3508                                 curlwp_bindx(bound);
 3509                                 return error;
 3510                         }
 3511                 }
 3512                 KERNEL_LOCK_UNLESS_NET_MPSAFE();
 3513                 mutex_enter(&if_clone_mtx);
 3514                 r = (cmd == SIOCIFCREATE) ?
 3515                         if_clone_create(ifr->ifr_name) :
 3516                         if_clone_destroy(ifr->ifr_name);
 3517                 mutex_exit(&if_clone_mtx);
 3518                 KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
 3519                 curlwp_bindx(bound);
 3520                 return r;
 3521             }
 3522         case SIOCIFGCLONERS: {
 3523                 struct if_clonereq *req = (struct if_clonereq *)data;
 3524                 return if_clone_list(req->ifcr_count, req->ifcr_buffer,
 3525                     &req->ifcr_total);
 3526             }
 3527         }
 3528 
 3529         if ((cmd & IOC_IN) == 0 || IOCPARM_LEN(cmd) < sizeof(ifr->ifr_name))
 3530                 return EINVAL;
 3531 
 3532         const int bound = curlwp_bind();
 3533         ifp = if_get(ifr->ifr_name, &psref);
 3534         if (ifp == NULL) {
 3535                 curlwp_bindx(bound);
 3536                 return ENXIO;
 3537         }
 3538 
 3539         switch (cmd) {
 3540         case SIOCALIFADDR:
 3541         case SIOCDLIFADDR:
 3542         case SIOCSIFADDRPREF:
 3543         case SIOCSIFFLAGS:
 3544         case SIOCSIFCAP:
 3545         case SIOCSIFMETRIC:
 3546         case SIOCZIFDATA:
 3547         case SIOCSIFMTU:
 3548         case SIOCSIFPHYADDR:
 3549         case SIOCDIFPHYADDR:
 3550 #ifdef INET6
 3551         case SIOCSIFPHYADDR_IN6:
 3552 #endif
 3553         case SIOCSLIFPHYADDR:
 3554         case SIOCADDMULTI:
 3555         case SIOCDELMULTI:
 3556         case SIOCSETHERCAP:
 3557         case SIOCSIFMEDIA:
 3558         case SIOCSDRVSPEC:
 3559         case SIOCG80211:
 3560         case SIOCS80211:
 3561         case SIOCS80211NWID:
 3562         case SIOCS80211NWKEY:
 3563         case SIOCS80211POWER:
 3564         case SIOCS80211BSSID:
 3565         case SIOCS80211CHANNEL:
 3566         case SIOCSLINKSTR:
 3567                 if (l != NULL) {
 3568                         error = kauth_authorize_network(l->l_cred,
 3569                             KAUTH_NETWORK_INTERFACE,
 3570                             KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
 3571                             KAUTH_ARG(cmd), NULL);
 3572                         if (error != 0)
 3573                                 goto out;
 3574                 }
 3575         }
 3576 
 3577         oif_flags = ifp->if_flags;
 3578 
 3579         KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp);
 3580         IFNET_LOCK(ifp);
 3581 
 3582         error = if_ioctl(ifp, cmd, data);
 3583         if (error != ENOTTY)
 3584                 ;
 3585         else if (so->so_proto == NULL)
 3586                 error = EOPNOTSUPP;
 3587         else {
 3588                 KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
 3589                 MODULE_HOOK_CALL(if_ifioctl_43_hook,
 3590                              (so, ocmd, cmd, data, l), enosys(), error);
 3591                 if (error == ENOSYS)
 3592                         error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so,
 3593                             cmd, data, ifp);
 3594                 KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
 3595         }
 3596 
 3597         if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) {
 3598                 if ((ifp->if_flags & IFF_UP) != 0) {
 3599                         const int s = splsoftnet();
 3600                         if_up_locked(ifp);
 3601                         splx(s);
 3602                 }
 3603         }
 3604 
 3605         /* Post-conversion */
 3606         if (do_ifm80_post && (error == 0))
 3607                 MODULE_HOOK_CALL(ifmedia_80_post_hook, (ifr, cmd),
 3608                     enosys(), error);
 3609         if (do_if43_post)
 3610                 IFREQN2O_43(oifr, ifr);
 3611 
 3612         IFNET_UNLOCK(ifp);
 3613         KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp);
 3614 out:
 3615         if_put(ifp, &psref);
 3616         curlwp_bindx(bound);
 3617         return error;
 3618 }
 3619 
 3620 /*
 3621  * Return interface configuration
 3622  * of system.  List may be used
 3623  * in later ioctl's (above) to get
 3624  * other information.
 3625  *
 3626  * Each record is a struct ifreq.  Before the addition of
 3627  * sockaddr_storage, the API rule was that sockaddr flavors that did
 3628  * not fit would extend beyond the struct ifreq, with the next struct
 3629  * ifreq starting sa_len beyond the struct sockaddr.  Because the
 3630  * union in struct ifreq includes struct sockaddr_storage, every kind
 3631  * of sockaddr must fit.  Thus, there are no longer any overlength
 3632  * records.
 3633  *
 3634  * Records are added to the user buffer if they fit, and ifc_len is
 3635  * adjusted to the length that was written.  Thus, the user is only
 3636  * assured of getting the complete list if ifc_len on return is at
 3637  * least sizeof(struct ifreq) less than it was on entry.
 3638  *
 3639  * If the user buffer pointer is NULL, this routine copies no data and
 3640  * returns the amount of space that would be needed.
 3641  *
 3642  * Invariants:
 3643  * ifrp points to the next part of the user's buffer to be used.  If
 3644  * ifrp != NULL, space holds the number of bytes remaining that we may
 3645  * write at ifrp.  Otherwise, space holds the number of bytes that
 3646  * would have been written had there been adequate space.
 3647  */
 3648 /*ARGSUSED*/
 3649 static int
 3650 ifconf(u_long cmd, void *data)
 3651 {
 3652         struct ifconf *ifc = (struct ifconf *)data;
 3653         struct ifnet *ifp;
 3654         struct ifaddr *ifa;
 3655         struct ifreq ifr, *ifrp = NULL;
 3656         int space = 0, error = 0;
 3657         const int sz = (int)sizeof(struct ifreq);
 3658         const bool docopy = ifc->ifc_req != NULL;
 3659         struct psref psref;
 3660 
 3661         if (docopy) {
 3662                 if (ifc->ifc_len < 0)
 3663                         return EINVAL;
 3664 
 3665                 space = ifc->ifc_len;
 3666                 ifrp = ifc->ifc_req;
 3667         }
 3668         memset(&ifr, 0, sizeof(ifr));
 3669 
 3670         const int bound = curlwp_bind();
 3671         int s = pserialize_read_enter();
 3672         IFNET_READER_FOREACH(ifp) {
 3673                 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
 3674                 pserialize_read_exit(s);
 3675 
 3676                 (void)strncpy(ifr.ifr_name, ifp->if_xname,
 3677                     sizeof(ifr.ifr_name));
 3678                 if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') {
 3679                         error = ENAMETOOLONG;
 3680                         goto release_exit;
 3681                 }
 3682                 if (IFADDR_READER_EMPTY(ifp)) {
 3683                         /* Interface with no addresses - send zero sockaddr. */
 3684                         memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr));
 3685                         if (!docopy) {
 3686                                 space += sz;
 3687                                 goto next;
 3688                         }
 3689                         if (space >= sz) {
 3690                                 error = copyout(&ifr, ifrp, sz);
 3691                                 if (error != 0)
 3692                                         goto release_exit;
 3693                                 ifrp++;
 3694                                 space -= sz;
 3695                         }
 3696                 }
 3697 
 3698                 s = pserialize_read_enter();
 3699                 IFADDR_READER_FOREACH(ifa, ifp) {
 3700                         struct sockaddr *sa = ifa->ifa_addr;
 3701                         /* all sockaddrs must fit in sockaddr_storage */
 3702                         KASSERT(sa->sa_len <= sizeof(ifr.ifr_ifru));
 3703 
 3704                         if (!docopy) {
 3705                                 space += sz;
 3706                                 continue;
 3707                         }
 3708                         memcpy(&ifr.ifr_space, sa, sa->sa_len);
 3709                         pserialize_read_exit(s);
 3710 
 3711                         if (space >= sz) {
 3712                                 error = copyout(&ifr, ifrp, sz);
 3713                                 if (error != 0)
 3714                                         goto release_exit;
 3715                                 ifrp++; space -= sz;
 3716                         }
 3717                         s = pserialize_read_enter();
 3718                 }
 3719                 pserialize_read_exit(s);
 3720 
 3721 next:
 3722                 s = pserialize_read_enter();
 3723                 psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
 3724         }
 3725         pserialize_read_exit(s);
 3726         curlwp_bindx(bound);
 3727 
 3728         if (docopy) {
 3729                 KASSERT(0 <= space && space <= ifc->ifc_len);
 3730                 ifc->ifc_len -= space;
 3731         } else {
 3732                 KASSERT(space >= 0);
 3733                 ifc->ifc_len = space;
 3734         }
 3735         return 0;
 3736 
 3737 release_exit:
 3738         psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
 3739         curlwp_bindx(bound);
 3740         return error;
 3741 }
 3742 
 3743 int
 3744 ifreq_setaddr(u_long cmd, struct ifreq *ifr, const struct sockaddr *sa)
 3745 {
 3746         uint8_t len = sizeof(ifr->ifr_ifru.ifru_space);
 3747         struct ifreq ifrb;
 3748         struct oifreq *oifr = NULL;
 3749         u_long ocmd = cmd;
 3750         int hook;
 3751 
 3752         MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), hook);
 3753         if (hook != ENOSYS) {
 3754                 if (cmd != ocmd) {
 3755                         oifr = (struct oifreq *)(void *)ifr;
 3756                         ifr = &ifrb;
 3757                         IFREQO2N_43(oifr, ifr);
 3758                                 len = sizeof(oifr->ifr_addr);
 3759                 }
 3760         }
 3761 
 3762         if (len < sa->sa_len)
 3763                 return EFBIG;
 3764 
 3765         memset(&ifr->ifr_addr, 0, len);
 3766         sockaddr_copy(&ifr->ifr_addr, len, sa);
 3767 
 3768         if (cmd != ocmd)
 3769                 IFREQN2O_43(oifr, ifr);
 3770         return 0;
 3771 }
 3772 
 3773 /*
 3774  * wrapper function for the drivers which doesn't have if_transmit().
 3775  */
 3776 static int
 3777 if_transmit(struct ifnet *ifp, struct mbuf *m)
 3778 {
 3779         int error;
 3780         size_t pktlen = m->m_pkthdr.len;
 3781         bool mcast = (m->m_flags & M_MCAST) != 0;
 3782 
 3783         const int s = splnet();
 3784 
 3785         IFQ_ENQUEUE(&ifp->if_snd, m, error);
 3786         if (error != 0) {
 3787                 /* mbuf is already freed */
 3788                 goto out;
 3789         }
 3790 
 3791         net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
 3792         if_statadd_ref(nsr, if_obytes, pktlen);
 3793         if (mcast)
 3794                 if_statinc_ref(nsr, if_omcasts);
 3795         IF_STAT_PUTREF(ifp);
 3796 
 3797         if ((ifp->if_flags & IFF_OACTIVE) == 0)
 3798                 if_start_lock(ifp);
 3799 out:
 3800         splx(s);
 3801 
 3802         return error;
 3803 }
 3804 
 3805 int
 3806 if_transmit_lock(struct ifnet *ifp, struct mbuf *m)
 3807 {
 3808         int error;
 3809 
 3810         kmsan_check_mbuf(m);
 3811 
 3812 #ifdef ALTQ
 3813         KERNEL_LOCK(1, NULL);
 3814         if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
 3815                 error = if_transmit(ifp, m);
 3816                 KERNEL_UNLOCK_ONE(NULL);
 3817         } else {
 3818                 KERNEL_UNLOCK_ONE(NULL);
 3819                 error = (*ifp->if_transmit)(ifp, m);
 3820                 /* mbuf is already freed */
 3821         }
 3822 #else /* !ALTQ */
 3823         error = (*ifp->if_transmit)(ifp, m);
 3824         /* mbuf is already freed */
 3825 #endif /* !ALTQ */
 3826 
 3827         return error;
 3828 }
 3829 
 3830 /*
 3831  * Queue message on interface, and start output if interface
 3832  * not yet active.
 3833  */
 3834 int
 3835 ifq_enqueue(struct ifnet *ifp, struct mbuf *m)
 3836 {
 3837 
 3838         return if_transmit_lock(ifp, m);
 3839 }
 3840 
 3841 /*
 3842  * Queue message on interface, possibly using a second fast queue
 3843  */
 3844 int
 3845 ifq_enqueue2(struct ifnet *ifp, struct ifqueue *ifq, struct mbuf *m)
 3846 {
 3847         int error = 0;
 3848 
 3849         if (ifq != NULL
 3850 #ifdef ALTQ
 3851             && ALTQ_IS_ENABLED(&ifp->if_snd) == 0
 3852 #endif
 3853             ) {
 3854                 if (IF_QFULL(ifq)) {
 3855                         IF_DROP(&ifp->if_snd);
 3856                         m_freem(m);
 3857                         if (error == 0)
 3858                                 error = ENOBUFS;
 3859                 } else
 3860                         IF_ENQUEUE(ifq, m);
 3861         } else
 3862                 IFQ_ENQUEUE(&ifp->if_snd, m, error);
 3863         if (error != 0) {
 3864                 if_statinc(ifp, if_oerrors);
 3865                 return error;
 3866         }
 3867         return 0;
 3868 }
 3869 
 3870 int
 3871 if_addr_init(ifnet_t *ifp, struct ifaddr *ifa, const bool src)
 3872 {
 3873         int rc;
 3874 
 3875         KASSERT(IFNET_LOCKED(ifp));
 3876         if (ifp->if_initaddr != NULL)
 3877                 rc = (*ifp->if_initaddr)(ifp, ifa, src);
 3878         else if (src || (rc = if_ioctl(ifp, SIOCSIFDSTADDR, ifa)) == ENOTTY)
 3879                 rc = if_ioctl(ifp, SIOCINITIFADDR, ifa);
 3880 
 3881         return rc;
 3882 }
 3883 
 3884 int
 3885 if_do_dad(struct ifnet *ifp)
 3886 {
 3887         if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 3888                 return 0;
 3889 
 3890         switch (ifp->if_type) {
 3891         case IFT_FAITH:
 3892                 /*
 3893                  * These interfaces do not have the IFF_LOOPBACK flag,
 3894                  * but loop packets back.  We do not have to do DAD on such
 3895                  * interfaces.  We should even omit it, because loop-backed
 3896                  * responses would confuse the DAD procedure.
 3897                  */
 3898                 return 0;
 3899         default:
 3900                 /*
 3901                  * Our DAD routine requires the interface up and running.
 3902                  * However, some interfaces can be up before the RUNNING
 3903                  * status.  Additionally, users may try to assign addresses
 3904                  * before the interface becomes up (or running).
 3905                  * We simply skip DAD in such a case as a work around.
 3906                  * XXX: we should rather mark "tentative" on such addresses,
 3907                  * and do DAD after the interface becomes ready.
 3908                  */
 3909                 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
 3910                     (IFF_UP | IFF_RUNNING))
 3911                         return 0;
 3912 
 3913                 return 1;
 3914         }
 3915 }
 3916 
 3917 /*
 3918  * if_flags_set(ifp, flags)
 3919  *
 3920  *      Ask ifp to change ifp->if_flags to flags, as if with the
 3921  *      SIOCSIFFLAGS ioctl command.
 3922  *
 3923  *      May sleep.  Caller must hold ifp->if_ioctl_lock, a.k.a
 3924  *      IFNET_LOCK.
 3925  */
 3926 int
 3927 if_flags_set(ifnet_t *ifp, const u_short flags)
 3928 {
 3929         int rc;
 3930 
 3931         KASSERT(IFNET_LOCKED(ifp));
 3932 
 3933         if (ifp->if_setflags != NULL)
 3934                 rc = (*ifp->if_setflags)(ifp, flags);
 3935         else {
 3936                 u_short cantflags, chgdflags;
 3937                 struct ifreq ifr;
 3938 
 3939                 chgdflags = ifp->if_flags ^ flags;
 3940                 cantflags = chgdflags & IFF_CANTCHANGE;
 3941 
 3942                 if (cantflags != 0)
 3943                         ifp->if_flags ^= cantflags;
 3944 
 3945                 /*
 3946                  * Traditionally, we do not call if_ioctl after
 3947                  * setting/clearing only IFF_PROMISC if the interface
 3948                  * isn't IFF_UP.  Uphold that tradition.
 3949                  */
 3950                 if (chgdflags == IFF_PROMISC && (ifp->if_flags & IFF_UP) == 0)
 3951                         return 0;
 3952 
 3953                 memset(&ifr, 0, sizeof(ifr));
 3954 
 3955                 ifr.ifr_flags = flags & ~IFF_CANTCHANGE;
 3956                 rc = if_ioctl(ifp, SIOCSIFFLAGS, &ifr);
 3957 
 3958                 if (rc != 0 && cantflags != 0)
 3959                         ifp->if_flags ^= cantflags;
 3960         }
 3961 
 3962         return rc;
 3963 }
 3964 
 3965 /*
 3966  * if_mcast_op(ifp, cmd, sa)
 3967  *
 3968  *      Apply a multicast command, SIOCADDMULTI/SIOCDELMULTI, to the
 3969  *      interface.  Returns 0 on success, nonzero errno(3) number on
 3970  *      failure.
 3971  *
 3972  *      May sleep.
 3973  *
 3974  *      Use this, not if_ioctl, for the multicast commands.
 3975  */
 3976 int
 3977 if_mcast_op(ifnet_t *ifp, const unsigned long cmd, const struct sockaddr *sa)
 3978 {
 3979         int rc;
 3980         struct ifreq ifr;
 3981 
 3982         switch (cmd) {
 3983         case SIOCADDMULTI:
 3984         case SIOCDELMULTI:
 3985                 break;
 3986         default:
 3987                 panic("invalid ifnet multicast command: 0x%lx", cmd);
 3988         }
 3989 
 3990         ifreq_setaddr(cmd, &ifr, sa);
 3991         rc = if_ioctl(ifp, cmd, &ifr);
 3992 
 3993         return rc;
 3994 }
 3995 
 3996 static void
 3997 sysctl_sndq_setup(struct sysctllog **clog, const char *ifname,
 3998     struct ifaltq *ifq)
 3999 {
 4000         const struct sysctlnode *cnode, *rnode;
 4001 
 4002         if (sysctl_createv(clog, 0, NULL, &rnode,
 4003                        CTLFLAG_PERMANENT,
 4004                        CTLTYPE_NODE, "interfaces",
 4005                        SYSCTL_DESCR("Per-interface controls"),
 4006                        NULL, 0, NULL, 0,
 4007                        CTL_NET, CTL_CREATE, CTL_EOL) != 0)
 4008                 goto bad;
 4009 
 4010         if (sysctl_createv(clog, 0, &rnode, &rnode,
 4011                        CTLFLAG_PERMANENT,
 4012                        CTLTYPE_NODE, ifname,
 4013                        SYSCTL_DESCR("Interface controls"),
 4014                        NULL, 0, NULL, 0,
 4015                        CTL_CREATE, CTL_EOL) != 0)
 4016                 goto bad;
 4017 
 4018         if (sysctl_createv(clog, 0, &rnode, &rnode,
 4019                        CTLFLAG_PERMANENT,
 4020                        CTLTYPE_NODE, "sndq",
 4021                        SYSCTL_DESCR("Interface output queue controls"),
 4022                        NULL, 0, NULL, 0,
 4023                        CTL_CREATE, CTL_EOL) != 0)
 4024                 goto bad;
 4025 
 4026         if (sysctl_createv(clog, 0, &rnode, &cnode,
 4027                        CTLFLAG_PERMANENT,
 4028                        CTLTYPE_INT, "len",
 4029                        SYSCTL_DESCR("Current output queue length"),
 4030                        NULL, 0, &ifq->ifq_len, 0,
 4031                        CTL_CREATE, CTL_EOL) != 0)
 4032                 goto bad;
 4033 
 4034         if (sysctl_createv(clog, 0, &rnode, &cnode,
 4035                        CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
 4036                        CTLTYPE_INT, "maxlen",
 4037                        SYSCTL_DESCR("Maximum allowed output queue length"),
 4038                        NULL, 0, &ifq->ifq_maxlen, 0,
 4039                        CTL_CREATE, CTL_EOL) != 0)
 4040                 goto bad;
 4041 
 4042         if (sysctl_createv(clog, 0, &rnode, &cnode,
 4043                        CTLFLAG_PERMANENT,
 4044                        CTLTYPE_QUAD, "drops",
 4045                        SYSCTL_DESCR("Packets dropped due to full output queue"),
 4046                        NULL, 0, &ifq->ifq_drops, 0,
 4047                        CTL_CREATE, CTL_EOL) != 0)
 4048                 goto bad;
 4049 
 4050         return;
 4051 bad:
 4052         printf("%s: could not attach sysctl nodes\n", ifname);
 4053         return;
 4054 }
 4055 
 4056 static int
 4057 if_sdl_sysctl(SYSCTLFN_ARGS)
 4058 {
 4059         struct ifnet *ifp;
 4060         const struct sockaddr_dl *sdl;
 4061         struct psref psref;
 4062         int error = 0;
 4063 
 4064         if (namelen != 1)
 4065                 return EINVAL;
 4066 
 4067         const int bound = curlwp_bind();
 4068         ifp = if_get_byindex(name[0], &psref);
 4069         if (ifp == NULL) {
 4070                 error = ENODEV;
 4071                 goto out0;
 4072         }
 4073 
 4074         sdl = ifp->if_sadl;
 4075         if (sdl == NULL) {
 4076                 *oldlenp = 0;
 4077                 goto out1;
 4078         }
 4079 
 4080         if (oldp == NULL) {
 4081                 *oldlenp = sdl->sdl_alen;
 4082                 goto out1;
 4083         }
 4084 
 4085         if (*oldlenp >= sdl->sdl_alen)
 4086                 *oldlenp = sdl->sdl_alen;
 4087         error = sysctl_copyout(l, &sdl->sdl_data[sdl->sdl_nlen],
 4088             oldp, *oldlenp);
 4089 out1:
 4090         if_put(ifp, &psref);
 4091 out0:
 4092         curlwp_bindx(bound);
 4093         return error;
 4094 }
 4095 
 4096 static void
 4097 if_sysctl_setup(struct sysctllog **clog)
 4098 {
 4099         const struct sysctlnode *rnode = NULL;
 4100 
 4101         sysctl_createv(clog, 0, NULL, &rnode,
 4102                        CTLFLAG_PERMANENT,
 4103                        CTLTYPE_NODE, "sdl",
 4104                        SYSCTL_DESCR("Get active link-layer address"),
 4105                        if_sdl_sysctl, 0, NULL, 0,
 4106                        CTL_NET, CTL_CREATE, CTL_EOL);
 4107 }

Cache object: d21a98f6ce63964d2b7fa579159ee704


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.