The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1980, 1986, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)if.c        8.3 (Berkeley) 1/4/94
   30  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
   31  */
   32 
   33 #include "opt_compat.h"
   34 #include "opt_inet6.h"
   35 #include "opt_inet.h"
   36 #include "opt_ifpoll.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/malloc.h>
   40 #include <sys/mbuf.h>
   41 #include <sys/systm.h>
   42 #include <sys/proc.h>
   43 #include <sys/priv.h>
   44 #include <sys/protosw.h>
   45 #include <sys/socket.h>
   46 #include <sys/socketvar.h>
   47 #include <sys/socketops.h>
   48 #include <sys/kernel.h>
   49 #include <sys/ktr.h>
   50 #include <sys/mutex.h>
   51 #include <sys/sockio.h>
   52 #include <sys/syslog.h>
   53 #include <sys/sysctl.h>
   54 #include <sys/domain.h>
   55 #include <sys/thread.h>
   56 #include <sys/serialize.h>
   57 #include <sys/bus.h>
   58 
   59 #include <sys/thread2.h>
   60 #include <sys/msgport2.h>
   61 #include <sys/mutex2.h>
   62 
   63 #include <net/if.h>
   64 #include <net/if_arp.h>
   65 #include <net/if_dl.h>
   66 #include <net/if_types.h>
   67 #include <net/if_var.h>
   68 #include <net/ifq_var.h>
   69 #include <net/radix.h>
   70 #include <net/route.h>
   71 #include <net/if_clone.h>
   72 #include <net/netisr2.h>
   73 #include <net/netmsg2.h>
   74 
   75 #include <machine/atomic.h>
   76 #include <machine/stdarg.h>
   77 #include <machine/smp.h>
   78 
   79 #if defined(INET) || defined(INET6)
   80 /*XXX*/
   81 #include <netinet/in.h>
   82 #include <netinet/in_var.h>
   83 #include <netinet/if_ether.h>
   84 #ifdef INET6
   85 #include <netinet6/in6_var.h>
   86 #include <netinet6/in6_ifattach.h>
   87 #endif
   88 #endif
   89 
   90 #if defined(COMPAT_43)
   91 #include <emulation/43bsd/43bsd_socket.h>
   92 #endif /* COMPAT_43 */
   93 
   94 struct netmsg_ifaddr {
   95         struct netmsg_base base;
   96         struct ifaddr   *ifa;
   97         struct ifnet    *ifp;
   98         int             tail;
   99 };
  100 
  101 struct ifsubq_stage_head {
  102         TAILQ_HEAD(, ifsubq_stage)      stg_head;
  103 } __cachealign;
  104 
  105 /*
  106  * System initialization
  107  */
  108 static void     if_attachdomain(void *);
  109 static void     if_attachdomain1(struct ifnet *);
  110 static int      ifconf(u_long, caddr_t, struct ucred *);
  111 static void     ifinit(void *);
  112 static void     ifnetinit(void *);
  113 static void     if_slowtimo(void *);
  114 static void     link_rtrequest(int, struct rtentry *);
  115 static int      if_rtdel(struct radix_node *, void *);
  116 
  117 /* Helper functions */
  118 static void     ifsq_watchdog_reset(struct ifsubq_watchdog *);
  119 
  120 #ifdef INET6
  121 /*
  122  * XXX: declare here to avoid to include many inet6 related files..
  123  * should be more generalized?
  124  */
  125 extern void     nd6_setmtu(struct ifnet *);
  126 #endif
  127 
  128 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
  129 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
  130 
  131 static int ifsq_stage_cntmax = 4;
  132 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax);
  133 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW,
  134     &ifsq_stage_cntmax, 0, "ifq staging packet count max");
  135 
  136 static int if_stats_compat = 0;
  137 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW,
  138     &if_stats_compat, 0, "Compat the old ifnet stats");
  139 
  140 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
  141 /* Must be after netisr_init */
  142 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
  143 
  144 static  if_com_alloc_t *if_com_alloc[256];
  145 static  if_com_free_t *if_com_free[256];
  146 
  147 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
  148 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
  149 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
  150 
  151 int                     ifqmaxlen = IFQ_MAXLEN;
  152 struct ifnethead        ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
  153 
  154 struct callout          if_slowtimo_timer;
  155 
  156 int                     if_index = 0;
  157 struct ifnet            **ifindex2ifnet = NULL;
  158 static struct thread    ifnet_threads[MAXCPU];
  159 
  160 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU];
  161 
  162 #ifdef notyet
  163 #define IFQ_KTR_STRING          "ifq=%p"
  164 #define IFQ_KTR_ARGS    struct ifaltq *ifq
  165 #ifndef KTR_IFQ
  166 #define KTR_IFQ                 KTR_ALL
  167 #endif
  168 KTR_INFO_MASTER(ifq);
  169 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
  170 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
  171 #define logifq(name, arg)       KTR_LOG(ifq_ ## name, arg)
  172 
  173 #define IF_START_KTR_STRING     "ifp=%p"
  174 #define IF_START_KTR_ARGS       struct ifnet *ifp
  175 #ifndef KTR_IF_START
  176 #define KTR_IF_START            KTR_ALL
  177 #endif
  178 KTR_INFO_MASTER(if_start);
  179 KTR_INFO(KTR_IF_START, if_start, run, 0,
  180          IF_START_KTR_STRING, IF_START_KTR_ARGS);
  181 KTR_INFO(KTR_IF_START, if_start, sched, 1,
  182          IF_START_KTR_STRING, IF_START_KTR_ARGS);
  183 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
  184          IF_START_KTR_STRING, IF_START_KTR_ARGS);
  185 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
  186          IF_START_KTR_STRING, IF_START_KTR_ARGS);
  187 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
  188          IF_START_KTR_STRING, IF_START_KTR_ARGS);
  189 #define logifstart(name, arg)   KTR_LOG(if_start_ ## name, arg)
  190 #endif
  191 
  192 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
  193 
  194 /*
  195  * Network interface utility routines.
  196  *
  197  * Routines with ifa_ifwith* names take sockaddr *'s as
  198  * parameters.
  199  */
  200 /* ARGSUSED*/
  201 void
  202 ifinit(void *dummy)
  203 {
  204         struct ifnet *ifp;
  205 
  206         callout_init(&if_slowtimo_timer);
  207 
  208         crit_enter();
  209         TAILQ_FOREACH(ifp, &ifnet, if_link) {
  210                 if (ifp->if_snd.altq_maxlen == 0) {
  211                         if_printf(ifp, "XXX: driver didn't set altq_maxlen\n");
  212                         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
  213                 }
  214         }
  215         crit_exit();
  216 
  217         if_slowtimo(0);
  218 }
  219 
  220 static void
  221 ifsq_ifstart_ipifunc(void *arg)
  222 {
  223         struct ifaltq_subque *ifsq = arg;
  224         struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid);
  225 
  226         crit_enter();
  227         if (lmsg->ms_flags & MSGF_DONE)
  228                 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg);
  229         crit_exit();
  230 }
  231 
  232 static __inline void
  233 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
  234 {
  235         KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
  236         TAILQ_REMOVE(&head->stg_head, stage, stg_link);
  237         stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED);
  238         stage->stg_cnt = 0;
  239         stage->stg_len = 0;
  240 }
  241 
  242 static __inline void
  243 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
  244 {
  245         KKASSERT((stage->stg_flags &
  246             (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
  247         stage->stg_flags |= IFSQ_STAGE_FLAG_QUED;
  248         TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link);
  249 }
  250 
  251 /*
  252  * Schedule ifnet.if_start on the subqueue owner CPU
  253  */
  254 static void
  255 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force)
  256 {
  257         int cpu;
  258 
  259         if (!force && curthread->td_type == TD_TYPE_NETISR &&
  260             ifsq_stage_cntmax > 0) {
  261                 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
  262 
  263                 stage->stg_cnt = 0;
  264                 stage->stg_len = 0;
  265                 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
  266                         ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage);
  267                 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED;
  268                 return;
  269         }
  270 
  271         cpu = ifsq_get_cpuid(ifsq);
  272         if (cpu != mycpuid)
  273                 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq);
  274         else
  275                 ifsq_ifstart_ipifunc(ifsq);
  276 }
  277 
  278 /*
  279  * NOTE:
  280  * This function will release ifnet.if_start subqueue interlock,
  281  * if ifnet.if_start for the subqueue does not need to be scheduled
  282  */
  283 static __inline int
  284 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running)
  285 {
  286         if (!running || ifsq_is_empty(ifsq)
  287 #ifdef ALTQ
  288             || ifsq->ifsq_altq->altq_tbr != NULL
  289 #endif
  290         ) {
  291                 ALTQ_SQ_LOCK(ifsq);
  292                 /*
  293                  * ifnet.if_start subqueue interlock is released, if:
  294                  * 1) Hardware can not take any packets, due to
  295                  *    o  interface is marked down
  296                  *    o  hardware queue is full (ifsq_is_oactive)
  297                  *    Under the second situation, hardware interrupt
  298                  *    or polling(4) will call/schedule ifnet.if_start
  299                  *    on the subqueue when hardware queue is ready
  300                  * 2) There is no packet in the subqueue.
  301                  *    Further ifq_dispatch or ifq_handoff will call/
  302                  *    schedule ifnet.if_start on the subqueue.
  303                  * 3) TBR is used and it does not allow further
  304                  *    dequeueing.
  305                  *    TBR callout will call ifnet.if_start on the
  306                  *    subqueue.
  307                  */
  308                 if (!running || !ifsq_data_ready(ifsq)) {
  309                         ifsq_clr_started(ifsq);
  310                         ALTQ_SQ_UNLOCK(ifsq);
  311                         return 0;
  312                 }
  313                 ALTQ_SQ_UNLOCK(ifsq);
  314         }
  315         return 1;
  316 }
  317 
  318 static void
  319 ifsq_ifstart_dispatch(netmsg_t msg)
  320 {
  321         struct lwkt_msg *lmsg = &msg->base.lmsg;
  322         struct ifaltq_subque *ifsq = lmsg->u.ms_resultp;
  323         struct ifnet *ifp = ifsq_get_ifp(ifsq);
  324         int running = 0, need_sched;
  325 
  326         crit_enter();
  327         lwkt_replymsg(lmsg, 0); /* reply ASAP */
  328         crit_exit();
  329 
  330         if (mycpuid != ifsq_get_cpuid(ifsq)) {
  331                 /*
  332                  * We need to chase the subqueue owner CPU change.
  333                  */
  334                 ifsq_ifstart_schedule(ifsq, 1);
  335                 return;
  336         }
  337 
  338         ifsq_serialize_hw(ifsq);
  339         if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
  340                 ifp->if_start(ifp, ifsq);
  341                 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
  342                         running = 1;
  343         }
  344         need_sched = ifsq_ifstart_need_schedule(ifsq, running);
  345         ifsq_deserialize_hw(ifsq);
  346 
  347         if (need_sched) {
  348                 /*
  349                  * More data need to be transmitted, ifnet.if_start is
  350                  * scheduled on the subqueue owner CPU, and we keep going.
  351                  * NOTE: ifnet.if_start subqueue interlock is not released.
  352                  */
  353                 ifsq_ifstart_schedule(ifsq, 0);
  354         }
  355 }
  356 
  357 /* Device driver ifnet.if_start helper function */
  358 void
  359 ifsq_devstart(struct ifaltq_subque *ifsq)
  360 {
  361         struct ifnet *ifp = ifsq_get_ifp(ifsq);
  362         int running = 0;
  363 
  364         ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq);
  365 
  366         ALTQ_SQ_LOCK(ifsq);
  367         if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) {
  368                 ALTQ_SQ_UNLOCK(ifsq);
  369                 return;
  370         }
  371         ifsq_set_started(ifsq);
  372         ALTQ_SQ_UNLOCK(ifsq);
  373 
  374         ifp->if_start(ifp, ifsq);
  375 
  376         if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
  377                 running = 1;
  378 
  379         if (ifsq_ifstart_need_schedule(ifsq, running)) {
  380                 /*
  381                  * More data need to be transmitted, ifnet.if_start is
  382                  * scheduled on ifnet's CPU, and we keep going.
  383                  * NOTE: ifnet.if_start interlock is not released.
  384                  */
  385                 ifsq_ifstart_schedule(ifsq, 0);
  386         }
  387 }
  388 
  389 void
  390 if_devstart(struct ifnet *ifp)
  391 {
  392         ifsq_devstart(ifq_get_subq_default(&ifp->if_snd));
  393 }
  394 
  395 /* Device driver ifnet.if_start schedule helper function */
  396 void
  397 ifsq_devstart_sched(struct ifaltq_subque *ifsq)
  398 {
  399         ifsq_ifstart_schedule(ifsq, 1);
  400 }
  401 
  402 void
  403 if_devstart_sched(struct ifnet *ifp)
  404 {
  405         ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd));
  406 }
  407 
  408 static void
  409 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
  410 {
  411         lwkt_serialize_enter(ifp->if_serializer);
  412 }
  413 
  414 static void
  415 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
  416 {
  417         lwkt_serialize_exit(ifp->if_serializer);
  418 }
  419 
  420 static int
  421 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
  422 {
  423         return lwkt_serialize_try(ifp->if_serializer);
  424 }
  425 
  426 #ifdef INVARIANTS
  427 static void
  428 if_default_serialize_assert(struct ifnet *ifp,
  429                             enum ifnet_serialize slz __unused,
  430                             boolean_t serialized)
  431 {
  432         if (serialized)
  433                 ASSERT_SERIALIZED(ifp->if_serializer);
  434         else
  435                 ASSERT_NOT_SERIALIZED(ifp->if_serializer);
  436 }
  437 #endif
  438 
  439 /*
  440  * Attach an interface to the list of "active" interfaces.
  441  *
  442  * The serializer is optional.
  443  */
  444 void
  445 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
  446 {
  447         unsigned socksize, ifasize;
  448         int namelen, masklen;
  449         struct sockaddr_dl *sdl;
  450         struct ifaddr *ifa;
  451         struct ifaltq *ifq;
  452         int i, q;
  453 
  454         static int if_indexlim = 8;
  455 
  456         if (ifp->if_serialize != NULL) {
  457                 KASSERT(ifp->if_deserialize != NULL &&
  458                         ifp->if_tryserialize != NULL &&
  459                         ifp->if_serialize_assert != NULL,
  460                         ("serialize functions are partially setup"));
  461 
  462                 /*
  463                  * If the device supplies serialize functions,
  464                  * then clear if_serializer to catch any invalid
  465                  * usage of this field.
  466                  */
  467                 KASSERT(serializer == NULL,
  468                         ("both serialize functions and default serializer "
  469                          "are supplied"));
  470                 ifp->if_serializer = NULL;
  471         } else {
  472                 KASSERT(ifp->if_deserialize == NULL &&
  473                         ifp->if_tryserialize == NULL &&
  474                         ifp->if_serialize_assert == NULL,
  475                         ("serialize functions are partially setup"));
  476                 ifp->if_serialize = if_default_serialize;
  477                 ifp->if_deserialize = if_default_deserialize;
  478                 ifp->if_tryserialize = if_default_tryserialize;
  479 #ifdef INVARIANTS
  480                 ifp->if_serialize_assert = if_default_serialize_assert;
  481 #endif
  482 
  483                 /*
  484                  * The serializer can be passed in from the device,
  485                  * allowing the same serializer to be used for both
  486                  * the interrupt interlock and the device queue.
  487                  * If not specified, the netif structure will use an
  488                  * embedded serializer.
  489                  */
  490                 if (serializer == NULL) {
  491                         serializer = &ifp->if_default_serializer;
  492                         lwkt_serialize_init(serializer);
  493                 }
  494                 ifp->if_serializer = serializer;
  495         }
  496 
  497         mtx_init(&ifp->if_ioctl_mtx);
  498         mtx_lock(&ifp->if_ioctl_mtx);
  499 
  500         lwkt_gettoken(&ifnet_token);    /* protect if_index and ifnet tailq */
  501         ifp->if_index = ++if_index;
  502 
  503         /*
  504          * XXX -
  505          * The old code would work if the interface passed a pre-existing
  506          * chain of ifaddrs to this code.  We don't trust our callers to
  507          * properly initialize the tailq, however, so we no longer allow
  508          * this unlikely case.
  509          */
  510         ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
  511                                     M_IFADDR, M_WAITOK | M_ZERO);
  512         for (i = 0; i < ncpus; ++i)
  513                 TAILQ_INIT(&ifp->if_addrheads[i]);
  514 
  515         TAILQ_INIT(&ifp->if_prefixhead);
  516         TAILQ_INIT(&ifp->if_multiaddrs);
  517         TAILQ_INIT(&ifp->if_groups);
  518         getmicrotime(&ifp->if_lastchange);
  519         if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
  520                 unsigned int n;
  521                 struct ifnet **q;
  522 
  523                 if_indexlim <<= 1;
  524 
  525                 /* grow ifindex2ifnet */
  526                 n = if_indexlim * sizeof(*q);
  527                 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
  528                 if (ifindex2ifnet) {
  529                         bcopy(ifindex2ifnet, q, n/2);
  530                         kfree(ifindex2ifnet, M_IFADDR);
  531                 }
  532                 ifindex2ifnet = q;
  533         }
  534 
  535         ifindex2ifnet[if_index] = ifp;
  536 
  537         /*
  538          * create a Link Level name for this device
  539          */
  540         namelen = strlen(ifp->if_xname);
  541         masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
  542         socksize = masklen + ifp->if_addrlen;
  543 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
  544         if (socksize < sizeof(*sdl))
  545                 socksize = sizeof(*sdl);
  546         socksize = ROUNDUP(socksize);
  547 #undef ROUNDUP
  548         ifasize = sizeof(struct ifaddr) + 2 * socksize;
  549         ifa = ifa_create(ifasize, M_WAITOK);
  550         sdl = (struct sockaddr_dl *)(ifa + 1);
  551         sdl->sdl_len = socksize;
  552         sdl->sdl_family = AF_LINK;
  553         bcopy(ifp->if_xname, sdl->sdl_data, namelen);
  554         sdl->sdl_nlen = namelen;
  555         sdl->sdl_index = ifp->if_index;
  556         sdl->sdl_type = ifp->if_type;
  557         ifp->if_lladdr = ifa;
  558         ifa->ifa_ifp = ifp;
  559         ifa->ifa_rtrequest = link_rtrequest;
  560         ifa->ifa_addr = (struct sockaddr *)sdl;
  561         sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
  562         ifa->ifa_netmask = (struct sockaddr *)sdl;
  563         sdl->sdl_len = masklen;
  564         while (namelen != 0)
  565                 sdl->sdl_data[--namelen] = 0xff;
  566         ifa_iflink(ifa, ifp, 0 /* Insert head */);
  567 
  568         ifp->if_data_pcpu = kmalloc_cachealign(
  569             ncpus * sizeof(struct ifdata_pcpu), M_DEVBUF, M_WAITOK | M_ZERO);
  570 
  571         if (ifp->if_mapsubq == NULL)
  572                 ifp->if_mapsubq = ifq_mapsubq_default;
  573 
  574         ifq = &ifp->if_snd;
  575         ifq->altq_type = 0;
  576         ifq->altq_disc = NULL;
  577         ifq->altq_flags &= ALTQF_CANTCHANGE;
  578         ifq->altq_tbr = NULL;
  579         ifq->altq_ifp = ifp;
  580 
  581         if (ifq->altq_subq_cnt <= 0)
  582                 ifq->altq_subq_cnt = 1;
  583         ifq->altq_subq = kmalloc_cachealign(
  584             ifq->altq_subq_cnt * sizeof(struct ifaltq_subque),
  585             M_DEVBUF, M_WAITOK | M_ZERO);
  586 
  587         if (ifq->altq_maxlen == 0) {
  588                 if_printf(ifp, "driver didn't set altq_maxlen\n");
  589                 ifq_set_maxlen(ifq, ifqmaxlen);
  590         }
  591 
  592         for (q = 0; q < ifq->altq_subq_cnt; ++q) {
  593                 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
  594 
  595                 ALTQ_SQ_LOCK_INIT(ifsq);
  596                 ifsq->ifsq_index = q;
  597 
  598                 ifsq->ifsq_altq = ifq;
  599                 ifsq->ifsq_ifp = ifp;
  600 
  601                 ifsq->ifsq_maxlen = ifq->altq_maxlen;
  602                 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES;
  603                 ifsq->ifsq_prepended = NULL;
  604                 ifsq->ifsq_started = 0;
  605                 ifsq->ifsq_hw_oactive = 0;
  606                 ifsq_set_cpuid(ifsq, 0);
  607                 if (ifp->if_serializer != NULL)
  608                         ifsq_set_hw_serialize(ifsq, ifp->if_serializer);
  609 
  610                 ifsq->ifsq_stage =
  611                     kmalloc_cachealign(ncpus * sizeof(struct ifsubq_stage),
  612                     M_DEVBUF, M_WAITOK | M_ZERO);
  613                 for (i = 0; i < ncpus; ++i)
  614                         ifsq->ifsq_stage[i].stg_subq = ifsq;
  615 
  616                 ifsq->ifsq_ifstart_nmsg =
  617                     kmalloc(ncpus * sizeof(struct netmsg_base),
  618                     M_LWKTMSG, M_WAITOK);
  619                 for (i = 0; i < ncpus; ++i) {
  620                         netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL,
  621                             &netisr_adone_rport, 0, ifsq_ifstart_dispatch);
  622                         ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq;
  623                 }
  624         }
  625         ifq_set_classic(ifq);
  626 
  627         if (!SLIST_EMPTY(&domains))
  628                 if_attachdomain1(ifp);
  629 
  630         TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
  631         lwkt_reltoken(&ifnet_token);
  632 
  633         /* Announce the interface. */
  634         EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
  635         devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
  636         rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
  637 
  638         mtx_unlock(&ifp->if_ioctl_mtx);
  639 }
  640 
  641 static void
  642 if_attachdomain(void *dummy)
  643 {
  644         struct ifnet *ifp;
  645 
  646         crit_enter();
  647         TAILQ_FOREACH(ifp, &ifnet, if_list)
  648                 if_attachdomain1(ifp);
  649         crit_exit();
  650 }
  651 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
  652         if_attachdomain, NULL);
  653 
  654 static void
  655 if_attachdomain1(struct ifnet *ifp)
  656 {
  657         struct domain *dp;
  658 
  659         crit_enter();
  660 
  661         /* address family dependent data region */
  662         bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
  663         SLIST_FOREACH(dp, &domains, dom_next)
  664                 if (dp->dom_ifattach)
  665                         ifp->if_afdata[dp->dom_family] =
  666                                 (*dp->dom_ifattach)(ifp);
  667         crit_exit();
  668 }
  669 
  670 /*
  671  * Purge all addresses whose type is _not_ AF_LINK
  672  */
  673 void
  674 if_purgeaddrs_nolink(struct ifnet *ifp)
  675 {
  676         struct ifaddr_container *ifac, *next;
  677 
  678         TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
  679                               ifa_link, next) {
  680                 struct ifaddr *ifa = ifac->ifa;
  681 
  682                 /* Leave link ifaddr as it is */
  683                 if (ifa->ifa_addr->sa_family == AF_LINK)
  684                         continue;
  685 #ifdef INET
  686                 /* XXX: Ugly!! ad hoc just for INET */
  687                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
  688                         struct ifaliasreq ifr;
  689 #ifdef IFADDR_DEBUG_VERBOSE
  690                         int i;
  691 
  692                         kprintf("purge in4 addr %p: ", ifa);
  693                         for (i = 0; i < ncpus; ++i)
  694                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
  695                         kprintf("\n");
  696 #endif
  697 
  698                         bzero(&ifr, sizeof ifr);
  699                         ifr.ifra_addr = *ifa->ifa_addr;
  700                         if (ifa->ifa_dstaddr)
  701                                 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
  702                         if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
  703                                        NULL) == 0)
  704                                 continue;
  705                 }
  706 #endif /* INET */
  707 #ifdef INET6
  708                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
  709 #ifdef IFADDR_DEBUG_VERBOSE
  710                         int i;
  711 
  712                         kprintf("purge in6 addr %p: ", ifa);
  713                         for (i = 0; i < ncpus; ++i)
  714                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
  715                         kprintf("\n");
  716 #endif
  717 
  718                         in6_purgeaddr(ifa);
  719                         /* ifp_addrhead is already updated */
  720                         continue;
  721                 }
  722 #endif /* INET6 */
  723                 ifa_ifunlink(ifa, ifp);
  724                 ifa_destroy(ifa);
  725         }
  726 }
  727 
  728 static void
  729 ifq_stage_detach_handler(netmsg_t nmsg)
  730 {
  731         struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp;
  732         int q;
  733 
  734         for (q = 0; q < ifq->altq_subq_cnt; ++q) {
  735                 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
  736                 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
  737 
  738                 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED)
  739                         ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage);
  740         }
  741         lwkt_replymsg(&nmsg->lmsg, 0);
  742 }
  743 
  744 static void
  745 ifq_stage_detach(struct ifaltq *ifq)
  746 {
  747         struct netmsg_base base;
  748         int cpu;
  749 
  750         netmsg_init(&base, NULL, &curthread->td_msgport, 0,
  751             ifq_stage_detach_handler);
  752         base.lmsg.u.ms_resultp = ifq;
  753 
  754         for (cpu = 0; cpu < ncpus; ++cpu)
  755                 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0);
  756 }
  757 
  758 struct netmsg_if_rtdel {
  759         struct netmsg_base      base;
  760         struct ifnet            *ifp;
  761 };
  762 
  763 static void
  764 if_rtdel_dispatch(netmsg_t msg)
  765 {
  766         struct netmsg_if_rtdel *rmsg = (void *)msg;
  767         int i, nextcpu, cpu;
  768 
  769         cpu = mycpuid;
  770         for (i = 1; i <= AF_MAX; i++) {
  771                 struct radix_node_head  *rnh;
  772 
  773                 if ((rnh = rt_tables[cpu][i]) == NULL)
  774                         continue;
  775                 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp);
  776         }
  777 
  778         nextcpu = cpu + 1;
  779         if (nextcpu < ncpus)
  780                 lwkt_forwardmsg(netisr_cpuport(nextcpu), &rmsg->base.lmsg);
  781         else
  782                 lwkt_replymsg(&rmsg->base.lmsg, 0);
  783 }
  784 
  785 /*
  786  * Detach an interface, removing it from the
  787  * list of "active" interfaces.
  788  */
  789 void
  790 if_detach(struct ifnet *ifp)
  791 {
  792         struct netmsg_if_rtdel msg;
  793         struct domain *dp;
  794         int q;
  795 
  796         EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
  797 
  798         /*
  799          * Remove routes and flush queues.
  800          */
  801         crit_enter();
  802 #ifdef IFPOLL_ENABLE
  803         if (ifp->if_flags & IFF_NPOLLING)
  804                 ifpoll_deregister(ifp);
  805 #endif
  806         if_down(ifp);
  807 
  808 #ifdef ALTQ
  809         if (ifq_is_enabled(&ifp->if_snd))
  810                 altq_disable(&ifp->if_snd);
  811         if (ifq_is_attached(&ifp->if_snd))
  812                 altq_detach(&ifp->if_snd);
  813 #endif
  814 
  815         /*
  816          * Clean up all addresses.
  817          */
  818         ifp->if_lladdr = NULL;
  819 
  820         if_purgeaddrs_nolink(ifp);
  821         if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
  822                 struct ifaddr *ifa;
  823 
  824                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
  825                 KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
  826                         ("non-link ifaddr is left on if_addrheads"));
  827 
  828                 ifa_ifunlink(ifa, ifp);
  829                 ifa_destroy(ifa);
  830                 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
  831                         ("there are still ifaddrs left on if_addrheads"));
  832         }
  833 
  834 #ifdef INET
  835         /*
  836          * Remove all IPv4 kernel structures related to ifp.
  837          */
  838         in_ifdetach(ifp);
  839 #endif
  840 
  841 #ifdef INET6
  842         /*
  843          * Remove all IPv6 kernel structs related to ifp.  This should be done
  844          * before removing routing entries below, since IPv6 interface direct
  845          * routes are expected to be removed by the IPv6-specific kernel API.
  846          * Otherwise, the kernel will detect some inconsistency and bark it.
  847          */
  848         in6_ifdetach(ifp);
  849 #endif
  850 
  851         /*
  852          * Delete all remaining routes using this interface
  853          */
  854         netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
  855             if_rtdel_dispatch);
  856         msg.ifp = ifp;
  857         rt_domsg_global(&msg.base);
  858 
  859         /* Announce that the interface is gone. */
  860         rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
  861         devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
  862 
  863         SLIST_FOREACH(dp, &domains, dom_next)
  864                 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
  865                         (*dp->dom_ifdetach)(ifp,
  866                                 ifp->if_afdata[dp->dom_family]);
  867 
  868         /*
  869          * Remove interface from ifindex2ifp[] and maybe decrement if_index.
  870          */
  871         lwkt_gettoken(&ifnet_token);
  872         ifindex2ifnet[ifp->if_index] = NULL;
  873         while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
  874                 if_index--;
  875         TAILQ_REMOVE(&ifnet, ifp, if_link);
  876         lwkt_reltoken(&ifnet_token);
  877 
  878         kfree(ifp->if_addrheads, M_IFADDR);
  879 
  880         lwkt_synchronize_ipiqs("if_detach");
  881         ifq_stage_detach(&ifp->if_snd);
  882 
  883         for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) {
  884                 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q];
  885 
  886                 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG);
  887                 kfree(ifsq->ifsq_stage, M_DEVBUF);
  888         }
  889         kfree(ifp->if_snd.altq_subq, M_DEVBUF);
  890 
  891         kfree(ifp->if_data_pcpu, M_DEVBUF);
  892 
  893         crit_exit();
  894 }
  895 
  896 /*
  897  * Create interface group without members
  898  */
  899 struct ifg_group *
  900 if_creategroup(const char *groupname)
  901 {
  902         struct ifg_group        *ifg = NULL;
  903 
  904         if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
  905             M_TEMP, M_NOWAIT)) == NULL)
  906                 return (NULL);
  907 
  908         strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
  909         ifg->ifg_refcnt = 0;
  910         ifg->ifg_carp_demoted = 0;
  911         TAILQ_INIT(&ifg->ifg_members);
  912 #if NPF > 0
  913         pfi_attach_ifgroup(ifg);
  914 #endif
  915         TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
  916 
  917         return (ifg);
  918 }
  919 
  920 /*
  921  * Add a group to an interface
  922  */
  923 int
  924 if_addgroup(struct ifnet *ifp, const char *groupname)
  925 {
  926         struct ifg_list         *ifgl;
  927         struct ifg_group        *ifg = NULL;
  928         struct ifg_member       *ifgm;
  929 
  930         if (groupname[0] && groupname[strlen(groupname) - 1] >= '' &&
  931             groupname[strlen(groupname) - 1] <= '9')
  932                 return (EINVAL);
  933 
  934         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
  935                 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
  936                         return (EEXIST);
  937 
  938         if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
  939                 return (ENOMEM);
  940 
  941         if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
  942                 kfree(ifgl, M_TEMP);
  943                 return (ENOMEM);
  944         }
  945 
  946         TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
  947                 if (!strcmp(ifg->ifg_group, groupname))
  948                         break;
  949 
  950         if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
  951                 kfree(ifgl, M_TEMP);
  952                 kfree(ifgm, M_TEMP);
  953                 return (ENOMEM);
  954         }
  955 
  956         ifg->ifg_refcnt++;
  957         ifgl->ifgl_group = ifg;
  958         ifgm->ifgm_ifp = ifp;
  959 
  960         TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
  961         TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
  962 
  963 #if NPF > 0
  964         pfi_group_change(groupname);
  965 #endif
  966 
  967         return (0);
  968 }
  969 
  970 /*
  971  * Remove a group from an interface
  972  */
  973 int
  974 if_delgroup(struct ifnet *ifp, const char *groupname)
  975 {
  976         struct ifg_list         *ifgl;
  977         struct ifg_member       *ifgm;
  978 
  979         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
  980                 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
  981                         break;
  982         if (ifgl == NULL)
  983                 return (ENOENT);
  984 
  985         TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
  986 
  987         TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
  988                 if (ifgm->ifgm_ifp == ifp)
  989                         break;
  990 
  991         if (ifgm != NULL) {
  992                 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
  993                 kfree(ifgm, M_TEMP);
  994         }
  995 
  996         if (--ifgl->ifgl_group->ifg_refcnt == 0) {
  997                 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
  998 #if NPF > 0
  999                 pfi_detach_ifgroup(ifgl->ifgl_group);
 1000 #endif
 1001                 kfree(ifgl->ifgl_group, M_TEMP);
 1002         }
 1003 
 1004         kfree(ifgl, M_TEMP);
 1005 
 1006 #if NPF > 0
 1007         pfi_group_change(groupname);
 1008 #endif
 1009 
 1010         return (0);
 1011 }
 1012 
 1013 /*
 1014  * Stores all groups from an interface in memory pointed
 1015  * to by data
 1016  */
 1017 int
 1018 if_getgroup(caddr_t data, struct ifnet *ifp)
 1019 {
 1020         int                      len, error;
 1021         struct ifg_list         *ifgl;
 1022         struct ifg_req           ifgrq, *ifgp;
 1023         struct ifgroupreq       *ifgr = (struct ifgroupreq *)data;
 1024 
 1025         if (ifgr->ifgr_len == 0) {
 1026                 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 1027                         ifgr->ifgr_len += sizeof(struct ifg_req);
 1028                 return (0);
 1029         }
 1030 
 1031         len = ifgr->ifgr_len;
 1032         ifgp = ifgr->ifgr_groups;
 1033         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
 1034                 if (len < sizeof(ifgrq))
 1035                         return (EINVAL);
 1036                 bzero(&ifgrq, sizeof ifgrq);
 1037                 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
 1038                     sizeof(ifgrq.ifgrq_group));
 1039                 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
 1040                     sizeof(struct ifg_req))))
 1041                         return (error);
 1042                 len -= sizeof(ifgrq);
 1043                 ifgp++;
 1044         }
 1045 
 1046         return (0);
 1047 }
 1048 
 1049 /*
 1050  * Stores all members of a group in memory pointed to by data
 1051  */
 1052 int
 1053 if_getgroupmembers(caddr_t data)
 1054 {
 1055         struct ifgroupreq       *ifgr = (struct ifgroupreq *)data;
 1056         struct ifg_group        *ifg;
 1057         struct ifg_member       *ifgm;
 1058         struct ifg_req           ifgrq, *ifgp;
 1059         int                      len, error;
 1060 
 1061         TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
 1062                 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
 1063                         break;
 1064         if (ifg == NULL)
 1065                 return (ENOENT);
 1066 
 1067         if (ifgr->ifgr_len == 0) {
 1068                 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
 1069                         ifgr->ifgr_len += sizeof(ifgrq);
 1070                 return (0);
 1071         }
 1072 
 1073         len = ifgr->ifgr_len;
 1074         ifgp = ifgr->ifgr_groups;
 1075         TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
 1076                 if (len < sizeof(ifgrq))
 1077                         return (EINVAL);
 1078                 bzero(&ifgrq, sizeof ifgrq);
 1079                 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
 1080                     sizeof(ifgrq.ifgrq_member));
 1081                 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
 1082                     sizeof(struct ifg_req))))
 1083                         return (error);
 1084                 len -= sizeof(ifgrq);
 1085                 ifgp++;
 1086         }
 1087 
 1088         return (0);
 1089 }
 1090 
 1091 /*
 1092  * Delete Routes for a Network Interface
 1093  *
 1094  * Called for each routing entry via the rnh->rnh_walktree() call above
 1095  * to delete all route entries referencing a detaching network interface.
 1096  *
 1097  * Arguments:
 1098  *      rn      pointer to node in the routing table
 1099  *      arg     argument passed to rnh->rnh_walktree() - detaching interface
 1100  *
 1101  * Returns:
 1102  *      0       successful
 1103  *      errno   failed - reason indicated
 1104  *
 1105  */
 1106 static int
 1107 if_rtdel(struct radix_node *rn, void *arg)
 1108 {
 1109         struct rtentry  *rt = (struct rtentry *)rn;
 1110         struct ifnet    *ifp = arg;
 1111         int             err;
 1112 
 1113         if (rt->rt_ifp == ifp) {
 1114 
 1115                 /*
 1116                  * Protect (sorta) against walktree recursion problems
 1117                  * with cloned routes
 1118                  */
 1119                 if (!(rt->rt_flags & RTF_UP))
 1120                         return (0);
 1121 
 1122                 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
 1123                                 rt_mask(rt), rt->rt_flags,
 1124                                 NULL);
 1125                 if (err) {
 1126                         log(LOG_WARNING, "if_rtdel: error %d\n", err);
 1127                 }
 1128         }
 1129 
 1130         return (0);
 1131 }
 1132 
 1133 /*
 1134  * Locate an interface based on a complete address.
 1135  */
 1136 struct ifaddr *
 1137 ifa_ifwithaddr(struct sockaddr *addr)
 1138 {
 1139         struct ifnet *ifp;
 1140 
 1141         TAILQ_FOREACH(ifp, &ifnet, if_link) {
 1142                 struct ifaddr_container *ifac;
 1143 
 1144                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
 1145                         struct ifaddr *ifa = ifac->ifa;
 1146 
 1147                         if (ifa->ifa_addr->sa_family != addr->sa_family)
 1148                                 continue;
 1149                         if (sa_equal(addr, ifa->ifa_addr))
 1150                                 return (ifa);
 1151                         if ((ifp->if_flags & IFF_BROADCAST) &&
 1152                             ifa->ifa_broadaddr &&
 1153                             /* IPv6 doesn't have broadcast */
 1154                             ifa->ifa_broadaddr->sa_len != 0 &&
 1155                             sa_equal(ifa->ifa_broadaddr, addr))
 1156                                 return (ifa);
 1157                 }
 1158         }
 1159         return (NULL);
 1160 }
 1161 /*
 1162  * Locate the point to point interface with a given destination address.
 1163  */
 1164 struct ifaddr *
 1165 ifa_ifwithdstaddr(struct sockaddr *addr)
 1166 {
 1167         struct ifnet *ifp;
 1168 
 1169         TAILQ_FOREACH(ifp, &ifnet, if_link) {
 1170                 struct ifaddr_container *ifac;
 1171 
 1172                 if (!(ifp->if_flags & IFF_POINTOPOINT))
 1173                         continue;
 1174 
 1175                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
 1176                         struct ifaddr *ifa = ifac->ifa;
 1177 
 1178                         if (ifa->ifa_addr->sa_family != addr->sa_family)
 1179                                 continue;
 1180                         if (ifa->ifa_dstaddr &&
 1181                             sa_equal(addr, ifa->ifa_dstaddr))
 1182                                 return (ifa);
 1183                 }
 1184         }
 1185         return (NULL);
 1186 }
 1187 
 1188 /*
 1189  * Find an interface on a specific network.  If many, choice
 1190  * is most specific found.
 1191  */
 1192 struct ifaddr *
 1193 ifa_ifwithnet(struct sockaddr *addr)
 1194 {
 1195         struct ifnet *ifp;
 1196         struct ifaddr *ifa_maybe = NULL;
 1197         u_int af = addr->sa_family;
 1198         char *addr_data = addr->sa_data, *cplim;
 1199 
 1200         /*
 1201          * AF_LINK addresses can be looked up directly by their index number,
 1202          * so do that if we can.
 1203          */
 1204         if (af == AF_LINK) {
 1205                 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
 1206 
 1207                 if (sdl->sdl_index && sdl->sdl_index <= if_index)
 1208                         return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
 1209         }
 1210 
 1211         /*
 1212          * Scan though each interface, looking for ones that have
 1213          * addresses in this address family.
 1214          */
 1215         TAILQ_FOREACH(ifp, &ifnet, if_link) {
 1216                 struct ifaddr_container *ifac;
 1217 
 1218                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
 1219                         struct ifaddr *ifa = ifac->ifa;
 1220                         char *cp, *cp2, *cp3;
 1221 
 1222                         if (ifa->ifa_addr->sa_family != af)
 1223 next:                           continue;
 1224                         if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
 1225                                 /*
 1226                                  * This is a bit broken as it doesn't
 1227                                  * take into account that the remote end may
 1228                                  * be a single node in the network we are
 1229                                  * looking for.
 1230                                  * The trouble is that we don't know the
 1231                                  * netmask for the remote end.
 1232                                  */
 1233                                 if (ifa->ifa_dstaddr != NULL &&
 1234                                     sa_equal(addr, ifa->ifa_dstaddr))
 1235                                         return (ifa);
 1236                         } else {
 1237                                 /*
 1238                                  * if we have a special address handler,
 1239                                  * then use it instead of the generic one.
 1240                                  */
 1241                                 if (ifa->ifa_claim_addr) {
 1242                                         if ((*ifa->ifa_claim_addr)(ifa, addr)) {
 1243                                                 return (ifa);
 1244                                         } else {
 1245                                                 continue;
 1246                                         }
 1247                                 }
 1248 
 1249                                 /*
 1250                                  * Scan all the bits in the ifa's address.
 1251                                  * If a bit dissagrees with what we are
 1252                                  * looking for, mask it with the netmask
 1253                                  * to see if it really matters.
 1254                                  * (A byte at a time)
 1255                                  */
 1256                                 if (ifa->ifa_netmask == 0)
 1257                                         continue;
 1258                                 cp = addr_data;
 1259                                 cp2 = ifa->ifa_addr->sa_data;
 1260                                 cp3 = ifa->ifa_netmask->sa_data;
 1261                                 cplim = ifa->ifa_netmask->sa_len +
 1262                                         (char *)ifa->ifa_netmask;
 1263                                 while (cp3 < cplim)
 1264                                         if ((*cp++ ^ *cp2++) & *cp3++)
 1265                                                 goto next; /* next address! */
 1266                                 /*
 1267                                  * If the netmask of what we just found
 1268                                  * is more specific than what we had before
 1269                                  * (if we had one) then remember the new one
 1270                                  * before continuing to search
 1271                                  * for an even better one.
 1272                                  */
 1273                                 if (ifa_maybe == NULL ||
 1274                                     rn_refines((char *)ifa->ifa_netmask,
 1275                                                (char *)ifa_maybe->ifa_netmask))
 1276                                         ifa_maybe = ifa;
 1277                         }
 1278                 }
 1279         }
 1280         return (ifa_maybe);
 1281 }
 1282 
 1283 /*
 1284  * Find an interface address specific to an interface best matching
 1285  * a given address.
 1286  */
 1287 struct ifaddr *
 1288 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
 1289 {
 1290         struct ifaddr_container *ifac;
 1291         char *cp, *cp2, *cp3;
 1292         char *cplim;
 1293         struct ifaddr *ifa_maybe = NULL;
 1294         u_int af = addr->sa_family;
 1295 
 1296         if (af >= AF_MAX)
 1297                 return (0);
 1298         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
 1299                 struct ifaddr *ifa = ifac->ifa;
 1300 
 1301                 if (ifa->ifa_addr->sa_family != af)
 1302                         continue;
 1303                 if (ifa_maybe == NULL)
 1304                         ifa_maybe = ifa;
 1305                 if (ifa->ifa_netmask == NULL) {
 1306                         if (sa_equal(addr, ifa->ifa_addr) ||
 1307                             (ifa->ifa_dstaddr != NULL &&
 1308                              sa_equal(addr, ifa->ifa_dstaddr)))
 1309                                 return (ifa);
 1310                         continue;
 1311                 }
 1312                 if (ifp->if_flags & IFF_POINTOPOINT) {
 1313                         if (sa_equal(addr, ifa->ifa_dstaddr))
 1314                                 return (ifa);
 1315                 } else {
 1316                         cp = addr->sa_data;
 1317                         cp2 = ifa->ifa_addr->sa_data;
 1318                         cp3 = ifa->ifa_netmask->sa_data;
 1319                         cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
 1320                         for (; cp3 < cplim; cp3++)
 1321                                 if ((*cp++ ^ *cp2++) & *cp3)
 1322                                         break;
 1323                         if (cp3 == cplim)
 1324                                 return (ifa);
 1325                 }
 1326         }
 1327         return (ifa_maybe);
 1328 }
 1329 
 1330 /*
 1331  * Default action when installing a route with a Link Level gateway.
 1332  * Lookup an appropriate real ifa to point to.
 1333  * This should be moved to /sys/net/link.c eventually.
 1334  */
 1335 static void
 1336 link_rtrequest(int cmd, struct rtentry *rt)
 1337 {
 1338         struct ifaddr *ifa;
 1339         struct sockaddr *dst;
 1340         struct ifnet *ifp;
 1341 
 1342         if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
 1343             (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
 1344                 return;
 1345         ifa = ifaof_ifpforaddr(dst, ifp);
 1346         if (ifa != NULL) {
 1347                 IFAFREE(rt->rt_ifa);
 1348                 IFAREF(ifa);
 1349                 rt->rt_ifa = ifa;
 1350                 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
 1351                         ifa->ifa_rtrequest(cmd, rt);
 1352         }
 1353 }
 1354 
 1355 /*
 1356  * Mark an interface down and notify protocols of
 1357  * the transition.
 1358  * NOTE: must be called at splnet or eqivalent.
 1359  */
 1360 void
 1361 if_unroute(struct ifnet *ifp, int flag, int fam)
 1362 {
 1363         struct ifaddr_container *ifac;
 1364 
 1365         ifp->if_flags &= ~flag;
 1366         getmicrotime(&ifp->if_lastchange);
 1367         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
 1368                 struct ifaddr *ifa = ifac->ifa;
 1369 
 1370                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 1371                         kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
 1372         }
 1373         ifq_purge_all(&ifp->if_snd);
 1374         rt_ifmsg(ifp);
 1375 }
 1376 
 1377 /*
 1378  * Mark an interface up and notify protocols of
 1379  * the transition.
 1380  * NOTE: must be called at splnet or eqivalent.
 1381  */
 1382 void
 1383 if_route(struct ifnet *ifp, int flag, int fam)
 1384 {
 1385         struct ifaddr_container *ifac;
 1386 
 1387         ifq_purge_all(&ifp->if_snd);
 1388         ifp->if_flags |= flag;
 1389         getmicrotime(&ifp->if_lastchange);
 1390         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
 1391                 struct ifaddr *ifa = ifac->ifa;
 1392 
 1393                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 1394                         kpfctlinput(PRC_IFUP, ifa->ifa_addr);
 1395         }
 1396         rt_ifmsg(ifp);
 1397 #ifdef INET6
 1398         in6_if_up(ifp);
 1399 #endif
 1400 }
 1401 
 1402 /*
 1403  * Mark an interface down and notify protocols of the transition.  An
 1404  * interface going down is also considered to be a synchronizing event.
 1405  * We must ensure that all packet processing related to the interface
 1406  * has completed before we return so e.g. the caller can free the ifnet
 1407  * structure that the mbufs may be referencing.
 1408  *
 1409  * NOTE: must be called at splnet or eqivalent.
 1410  */
 1411 void
 1412 if_down(struct ifnet *ifp)
 1413 {
 1414         if_unroute(ifp, IFF_UP, AF_UNSPEC);
 1415         netmsg_service_sync();
 1416 }
 1417 
 1418 /*
 1419  * Mark an interface up and notify protocols of
 1420  * the transition.
 1421  * NOTE: must be called at splnet or eqivalent.
 1422  */
 1423 void
 1424 if_up(struct ifnet *ifp)
 1425 {
 1426         if_route(ifp, IFF_UP, AF_UNSPEC);
 1427 }
 1428 
 1429 /*
 1430  * Process a link state change.
 1431  * NOTE: must be called at splsoftnet or equivalent.
 1432  */
 1433 void
 1434 if_link_state_change(struct ifnet *ifp)
 1435 {
 1436         int link_state = ifp->if_link_state;
 1437 
 1438         rt_ifmsg(ifp);
 1439         devctl_notify("IFNET", ifp->if_xname,
 1440             (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
 1441 }
 1442 
 1443 /*
 1444  * Handle interface watchdog timer routines.  Called
 1445  * from softclock, we decrement timers (if set) and
 1446  * call the appropriate interface routine on expiration.
 1447  */
 1448 static void
 1449 if_slowtimo(void *arg)
 1450 {
 1451         struct ifnet *ifp;
 1452 
 1453         crit_enter();
 1454 
 1455         TAILQ_FOREACH(ifp, &ifnet, if_link) {
 1456                 if (if_stats_compat) {
 1457                         IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets);
 1458                         IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors);
 1459                         IFNET_STAT_GET(ifp, opackets, ifp->if_opackets);
 1460                         IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors);
 1461                         IFNET_STAT_GET(ifp, collisions, ifp->if_collisions);
 1462                         IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes);
 1463                         IFNET_STAT_GET(ifp, obytes, ifp->if_obytes);
 1464                         IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts);
 1465                         IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts);
 1466                         IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops);
 1467                         IFNET_STAT_GET(ifp, noproto, ifp->if_noproto);
 1468                 }
 1469 
 1470                 if (ifp->if_timer == 0 || --ifp->if_timer)
 1471                         continue;
 1472                 if (ifp->if_watchdog) {
 1473                         if (ifnet_tryserialize_all(ifp)) {
 1474                                 (*ifp->if_watchdog)(ifp);
 1475                                 ifnet_deserialize_all(ifp);
 1476                         } else {
 1477                                 /* try again next timeout */
 1478                                 ++ifp->if_timer;
 1479                         }
 1480                 }
 1481         }
 1482 
 1483         crit_exit();
 1484 
 1485         callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
 1486 }
 1487 
 1488 /*
 1489  * Map interface name to
 1490  * interface structure pointer.
 1491  */
 1492 struct ifnet *
 1493 ifunit(const char *name)
 1494 {
 1495         struct ifnet *ifp;
 1496 
 1497         /*
 1498          * Search all the interfaces for this name/number
 1499          */
 1500 
 1501         TAILQ_FOREACH(ifp, &ifnet, if_link) {
 1502                 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
 1503                         break;
 1504         }
 1505         return (ifp);
 1506 }
 1507 
 1508 
 1509 /*
 1510  * Map interface name in a sockaddr_dl to
 1511  * interface structure pointer.
 1512  */
 1513 struct ifnet *
 1514 if_withname(struct sockaddr *sa)
 1515 {
 1516         char ifname[IFNAMSIZ+1];
 1517         struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
 1518 
 1519         if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
 1520              (sdl->sdl_nlen > IFNAMSIZ) )
 1521                 return NULL;
 1522 
 1523         /*
 1524          * ifunit wants a null-terminated name.  It may not be null-terminated
 1525          * in the sockaddr.  We don't want to change the caller's sockaddr,
 1526          * and there might not be room to put the trailing null anyway, so we
 1527          * make a local copy that we know we can null terminate safely.
 1528          */
 1529 
 1530         bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
 1531         ifname[sdl->sdl_nlen] = '\0';
 1532         return ifunit(ifname);
 1533 }
 1534 
 1535 
 1536 /*
 1537  * Interface ioctls.
 1538  */
 1539 int
 1540 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
 1541 {
 1542         struct ifnet *ifp;
 1543         struct ifreq *ifr;
 1544         struct ifstat *ifs;
 1545         int error;
 1546         short oif_flags;
 1547         int new_flags;
 1548 #ifdef COMPAT_43
 1549         int ocmd;
 1550 #endif
 1551         size_t namelen, onamelen;
 1552         char new_name[IFNAMSIZ];
 1553         struct ifaddr *ifa;
 1554         struct sockaddr_dl *sdl;
 1555 
 1556         switch (cmd) {
 1557         case SIOCGIFCONF:
 1558         case OSIOCGIFCONF:
 1559                 return (ifconf(cmd, data, cred));
 1560         default:
 1561                 break;
 1562         }
 1563 
 1564         ifr = (struct ifreq *)data;
 1565 
 1566         switch (cmd) {
 1567         case SIOCIFCREATE:
 1568         case SIOCIFCREATE2:
 1569                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
 1570                         return (error);
 1571                 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
 1572                         cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
 1573         case SIOCIFDESTROY:
 1574                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
 1575                         return (error);
 1576                 return (if_clone_destroy(ifr->ifr_name));
 1577         case SIOCIFGCLONERS:
 1578                 return (if_clone_list((struct if_clonereq *)data));
 1579         default:
 1580                 break;
 1581         }
 1582 
 1583         /*
 1584          * Nominal ioctl through interface, lookup the ifp and obtain a
 1585          * lock to serialize the ifconfig ioctl operation.
 1586          */
 1587         ifp = ifunit(ifr->ifr_name);
 1588         if (ifp == NULL)
 1589                 return (ENXIO);
 1590         error = 0;
 1591         mtx_lock(&ifp->if_ioctl_mtx);
 1592 
 1593         switch (cmd) {
 1594         case SIOCGIFINDEX:
 1595                 ifr->ifr_index = ifp->if_index;
 1596                 break;
 1597 
 1598         case SIOCGIFFLAGS:
 1599                 ifr->ifr_flags = ifp->if_flags;
 1600                 ifr->ifr_flagshigh = ifp->if_flags >> 16;
 1601                 break;
 1602 
 1603         case SIOCGIFCAP:
 1604                 ifr->ifr_reqcap = ifp->if_capabilities;
 1605                 ifr->ifr_curcap = ifp->if_capenable;
 1606                 break;
 1607 
 1608         case SIOCGIFMETRIC:
 1609                 ifr->ifr_metric = ifp->if_metric;
 1610                 break;
 1611 
 1612         case SIOCGIFMTU:
 1613                 ifr->ifr_mtu = ifp->if_mtu;
 1614                 break;
 1615 
 1616         case SIOCGIFTSOLEN:
 1617                 ifr->ifr_tsolen = ifp->if_tsolen;
 1618                 break;
 1619 
 1620         case SIOCGIFDATA:
 1621                 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
 1622                                 sizeof(ifp->if_data));
 1623                 break;
 1624 
 1625         case SIOCGIFPHYS:
 1626                 ifr->ifr_phys = ifp->if_physical;
 1627                 break;
 1628 
 1629         case SIOCGIFPOLLCPU:
 1630                 ifr->ifr_pollcpu = -1;
 1631                 break;
 1632 
 1633         case SIOCSIFPOLLCPU:
 1634                 break;
 1635 
 1636         case SIOCSIFFLAGS:
 1637                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1638                 if (error)
 1639                         break;
 1640                 new_flags = (ifr->ifr_flags & 0xffff) |
 1641                     (ifr->ifr_flagshigh << 16);
 1642                 if (ifp->if_flags & IFF_SMART) {
 1643                         /* Smart drivers twiddle their own routes */
 1644                 } else if (ifp->if_flags & IFF_UP &&
 1645                     (new_flags & IFF_UP) == 0) {
 1646                         crit_enter();
 1647                         if_down(ifp);
 1648                         crit_exit();
 1649                 } else if (new_flags & IFF_UP &&
 1650                     (ifp->if_flags & IFF_UP) == 0) {
 1651                         crit_enter();
 1652                         if_up(ifp);
 1653                         crit_exit();
 1654                 }
 1655 
 1656 #ifdef IFPOLL_ENABLE
 1657                 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
 1658                         if (new_flags & IFF_NPOLLING)
 1659                                 ifpoll_register(ifp);
 1660                         else
 1661                                 ifpoll_deregister(ifp);
 1662                 }
 1663 #endif
 1664 
 1665                 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 1666                         (new_flags &~ IFF_CANTCHANGE);
 1667                 if (new_flags & IFF_PPROMISC) {
 1668                         /* Permanently promiscuous mode requested */
 1669                         ifp->if_flags |= IFF_PROMISC;
 1670                 } else if (ifp->if_pcount == 0) {
 1671                         ifp->if_flags &= ~IFF_PROMISC;
 1672                 }
 1673                 if (ifp->if_ioctl) {
 1674                         ifnet_serialize_all(ifp);
 1675                         ifp->if_ioctl(ifp, cmd, data, cred);
 1676                         ifnet_deserialize_all(ifp);
 1677                 }
 1678                 getmicrotime(&ifp->if_lastchange);
 1679                 break;
 1680 
 1681         case SIOCSIFCAP:
 1682                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1683                 if (error)
 1684                         break;
 1685                 if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
 1686                         error = EINVAL;
 1687                         break;
 1688                 }
 1689                 ifnet_serialize_all(ifp);
 1690                 ifp->if_ioctl(ifp, cmd, data, cred);
 1691                 ifnet_deserialize_all(ifp);
 1692                 break;
 1693 
 1694         case SIOCSIFNAME:
 1695                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1696                 if (error)
 1697                         break;
 1698                 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
 1699                 if (error)
 1700                         break;
 1701                 if (new_name[0] == '\0') {
 1702                         error = EINVAL;
 1703                         break;
 1704                 }
 1705                 if (ifunit(new_name) != NULL) {
 1706                         error = EEXIST;
 1707                         break;
 1708                 }
 1709 
 1710                 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
 1711 
 1712                 /* Announce the departure of the interface. */
 1713                 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 1714 
 1715                 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
 1716                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
 1717                 /* XXX IFA_LOCK(ifa); */
 1718                 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 1719                 namelen = strlen(new_name);
 1720                 onamelen = sdl->sdl_nlen;
 1721                 /*
 1722                  * Move the address if needed.  This is safe because we
 1723                  * allocate space for a name of length IFNAMSIZ when we
 1724                  * create this in if_attach().
 1725                  */
 1726                 if (namelen != onamelen) {
 1727                         bcopy(sdl->sdl_data + onamelen,
 1728                             sdl->sdl_data + namelen, sdl->sdl_alen);
 1729                 }
 1730                 bcopy(new_name, sdl->sdl_data, namelen);
 1731                 sdl->sdl_nlen = namelen;
 1732                 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
 1733                 bzero(sdl->sdl_data, onamelen);
 1734                 while (namelen != 0)
 1735                         sdl->sdl_data[--namelen] = 0xff;
 1736                 /* XXX IFA_UNLOCK(ifa) */
 1737 
 1738                 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
 1739 
 1740                 /* Announce the return of the interface. */
 1741                 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 1742                 break;
 1743 
 1744         case SIOCSIFMETRIC:
 1745                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1746                 if (error)
 1747                         break;
 1748                 ifp->if_metric = ifr->ifr_metric;
 1749                 getmicrotime(&ifp->if_lastchange);
 1750                 break;
 1751 
 1752         case SIOCSIFPHYS:
 1753                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1754                 if (error)
 1755                         break;
 1756                 if (ifp->if_ioctl == NULL) {
 1757                         error = EOPNOTSUPP;
 1758                         break;
 1759                 }
 1760                 ifnet_serialize_all(ifp);
 1761                 error = ifp->if_ioctl(ifp, cmd, data, cred);
 1762                 ifnet_deserialize_all(ifp);
 1763                 if (error == 0)
 1764                         getmicrotime(&ifp->if_lastchange);
 1765                 break;
 1766 
 1767         case SIOCSIFMTU:
 1768         {
 1769                 u_long oldmtu = ifp->if_mtu;
 1770 
 1771                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1772                 if (error)
 1773                         break;
 1774                 if (ifp->if_ioctl == NULL) {
 1775                         error = EOPNOTSUPP;
 1776                         break;
 1777                 }
 1778                 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
 1779                         error = EINVAL;
 1780                         break;
 1781                 }
 1782                 ifnet_serialize_all(ifp);
 1783                 error = ifp->if_ioctl(ifp, cmd, data, cred);
 1784                 ifnet_deserialize_all(ifp);
 1785                 if (error == 0) {
 1786                         getmicrotime(&ifp->if_lastchange);
 1787                         rt_ifmsg(ifp);
 1788                 }
 1789                 /*
 1790                  * If the link MTU changed, do network layer specific procedure.
 1791                  */
 1792                 if (ifp->if_mtu != oldmtu) {
 1793 #ifdef INET6
 1794                         nd6_setmtu(ifp);
 1795 #endif
 1796                 }
 1797                 break;
 1798         }
 1799 
 1800         case SIOCSIFTSOLEN:
 1801                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1802                 if (error)
 1803                         break;
 1804 
 1805                 /* XXX need driver supplied upper limit */
 1806                 if (ifr->ifr_tsolen <= 0) {
 1807                         error = EINVAL;
 1808                         break;
 1809                 }
 1810                 ifp->if_tsolen = ifr->ifr_tsolen;
 1811                 break;
 1812 
 1813         case SIOCADDMULTI:
 1814         case SIOCDELMULTI:
 1815                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1816                 if (error)
 1817                         break;
 1818 
 1819                 /* Don't allow group membership on non-multicast interfaces. */
 1820                 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 1821                         error = EOPNOTSUPP;
 1822                         break;
 1823                 }
 1824 
 1825                 /* Don't let users screw up protocols' entries. */
 1826                 if (ifr->ifr_addr.sa_family != AF_LINK) {
 1827                         error = EINVAL;
 1828                         break;
 1829                 }
 1830 
 1831                 if (cmd == SIOCADDMULTI) {
 1832                         struct ifmultiaddr *ifma;
 1833                         error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
 1834                 } else {
 1835                         error = if_delmulti(ifp, &ifr->ifr_addr);
 1836                 }
 1837                 if (error == 0)
 1838                         getmicrotime(&ifp->if_lastchange);
 1839                 break;
 1840 
 1841         case SIOCSIFPHYADDR:
 1842         case SIOCDIFPHYADDR:
 1843 #ifdef INET6
 1844         case SIOCSIFPHYADDR_IN6:
 1845 #endif
 1846         case SIOCSLIFPHYADDR:
 1847         case SIOCSIFMEDIA:
 1848         case SIOCSIFGENERIC:
 1849                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1850                 if (error)
 1851                         break;
 1852                 if (ifp->if_ioctl == 0) {
 1853                         error = EOPNOTSUPP;
 1854                         break;
 1855                 }
 1856                 ifnet_serialize_all(ifp);
 1857                 error = ifp->if_ioctl(ifp, cmd, data, cred);
 1858                 ifnet_deserialize_all(ifp);
 1859                 if (error == 0)
 1860                         getmicrotime(&ifp->if_lastchange);
 1861                 break;
 1862 
 1863         case SIOCGIFSTATUS:
 1864                 ifs = (struct ifstat *)data;
 1865                 ifs->ascii[0] = '\0';
 1866                 /* fall through */
 1867         case SIOCGIFPSRCADDR:
 1868         case SIOCGIFPDSTADDR:
 1869         case SIOCGLIFPHYADDR:
 1870         case SIOCGIFMEDIA:
 1871         case SIOCGIFGENERIC:
 1872                 if (ifp->if_ioctl == NULL) {
 1873                         error = EOPNOTSUPP;
 1874                         break;
 1875                 }
 1876                 ifnet_serialize_all(ifp);
 1877                 error = ifp->if_ioctl(ifp, cmd, data, cred);
 1878                 ifnet_deserialize_all(ifp);
 1879                 break;
 1880 
 1881         case SIOCSIFLLADDR:
 1882                 error = priv_check_cred(cred, PRIV_ROOT, 0);
 1883                 if (error)
 1884                         break;
 1885                 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
 1886                                      ifr->ifr_addr.sa_len);
 1887                 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
 1888                 break;
 1889 
 1890         default:
 1891                 oif_flags = ifp->if_flags;
 1892                 if (so->so_proto == 0) {
 1893                         error = EOPNOTSUPP;
 1894                         break;
 1895                 }
 1896 #ifndef COMPAT_43
 1897                 error = so_pru_control_direct(so, cmd, data, ifp);
 1898 #else
 1899                 ocmd = cmd;
 1900 
 1901                 switch (cmd) {
 1902                 case SIOCSIFDSTADDR:
 1903                 case SIOCSIFADDR:
 1904                 case SIOCSIFBRDADDR:
 1905                 case SIOCSIFNETMASK:
 1906 #if BYTE_ORDER != BIG_ENDIAN
 1907                         if (ifr->ifr_addr.sa_family == 0 &&
 1908                             ifr->ifr_addr.sa_len < 16) {
 1909                                 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
 1910                                 ifr->ifr_addr.sa_len = 16;
 1911                         }
 1912 #else
 1913                         if (ifr->ifr_addr.sa_len == 0)
 1914                                 ifr->ifr_addr.sa_len = 16;
 1915 #endif
 1916                         break;
 1917                 case OSIOCGIFADDR:
 1918                         cmd = SIOCGIFADDR;
 1919                         break;
 1920                 case OSIOCGIFDSTADDR:
 1921                         cmd = SIOCGIFDSTADDR;
 1922                         break;
 1923                 case OSIOCGIFBRDADDR:
 1924                         cmd = SIOCGIFBRDADDR;
 1925                         break;
 1926                 case OSIOCGIFNETMASK:
 1927                         cmd = SIOCGIFNETMASK;
 1928                         break;
 1929                 default:
 1930                         break;
 1931                 }
 1932 
 1933                 error = so_pru_control_direct(so, cmd, data, ifp);
 1934 
 1935                 switch (ocmd) {
 1936                 case OSIOCGIFADDR:
 1937                 case OSIOCGIFDSTADDR:
 1938                 case OSIOCGIFBRDADDR:
 1939                 case OSIOCGIFNETMASK:
 1940                         *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
 1941                         break;
 1942                 }
 1943 #endif /* COMPAT_43 */
 1944 
 1945                 if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
 1946 #ifdef INET6
 1947                         DELAY(100);/* XXX: temporary workaround for fxp issue*/
 1948                         if (ifp->if_flags & IFF_UP) {
 1949                                 crit_enter();
 1950                                 in6_if_up(ifp);
 1951                                 crit_exit();
 1952                         }
 1953 #endif
 1954                 }
 1955                 break;
 1956         }
 1957 
 1958         mtx_unlock(&ifp->if_ioctl_mtx);
 1959         return (error);
 1960 }
 1961 
 1962 /*
 1963  * Set/clear promiscuous mode on interface ifp based on the truth value
 1964  * of pswitch.  The calls are reference counted so that only the first
 1965  * "on" request actually has an effect, as does the final "off" request.
 1966  * Results are undefined if the "off" and "on" requests are not matched.
 1967  */
 1968 int
 1969 ifpromisc(struct ifnet *ifp, int pswitch)
 1970 {
 1971         struct ifreq ifr;
 1972         int error;
 1973         int oldflags;
 1974 
 1975         oldflags = ifp->if_flags;
 1976         if (ifp->if_flags & IFF_PPROMISC) {
 1977                 /* Do nothing if device is in permanently promiscuous mode */
 1978                 ifp->if_pcount += pswitch ? 1 : -1;
 1979                 return (0);
 1980         }
 1981         if (pswitch) {
 1982                 /*
 1983                  * If the device is not configured up, we cannot put it in
 1984                  * promiscuous mode.
 1985                  */
 1986                 if ((ifp->if_flags & IFF_UP) == 0)
 1987                         return (ENETDOWN);
 1988                 if (ifp->if_pcount++ != 0)
 1989                         return (0);
 1990                 ifp->if_flags |= IFF_PROMISC;
 1991                 log(LOG_INFO, "%s: promiscuous mode enabled\n",
 1992                     ifp->if_xname);
 1993         } else {
 1994                 if (--ifp->if_pcount > 0)
 1995                         return (0);
 1996                 ifp->if_flags &= ~IFF_PROMISC;
 1997                 log(LOG_INFO, "%s: promiscuous mode disabled\n",
 1998                     ifp->if_xname);
 1999         }
 2000         ifr.ifr_flags = ifp->if_flags;
 2001         ifr.ifr_flagshigh = ifp->if_flags >> 16;
 2002         ifnet_serialize_all(ifp);
 2003         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
 2004         ifnet_deserialize_all(ifp);
 2005         if (error == 0)
 2006                 rt_ifmsg(ifp);
 2007         else
 2008                 ifp->if_flags = oldflags;
 2009         return error;
 2010 }
 2011 
 2012 /*
 2013  * Return interface configuration
 2014  * of system.  List may be used
 2015  * in later ioctl's (above) to get
 2016  * other information.
 2017  */
 2018 static int
 2019 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
 2020 {
 2021         struct ifconf *ifc = (struct ifconf *)data;
 2022         struct ifnet *ifp;
 2023         struct sockaddr *sa;
 2024         struct ifreq ifr, *ifrp;
 2025         int space = ifc->ifc_len, error = 0;
 2026 
 2027         ifrp = ifc->ifc_req;
 2028         TAILQ_FOREACH(ifp, &ifnet, if_link) {
 2029                 struct ifaddr_container *ifac;
 2030                 int addrs;
 2031 
 2032                 if (space <= sizeof ifr)
 2033                         break;
 2034 
 2035                 /*
 2036                  * Zero the stack declared structure first to prevent
 2037                  * memory disclosure.
 2038                  */
 2039                 bzero(&ifr, sizeof(ifr));
 2040                 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
 2041                     >= sizeof(ifr.ifr_name)) {
 2042                         error = ENAMETOOLONG;
 2043                         break;
 2044                 }
 2045 
 2046                 addrs = 0;
 2047                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
 2048                         struct ifaddr *ifa = ifac->ifa;
 2049 
 2050                         if (space <= sizeof ifr)
 2051                                 break;
 2052                         sa = ifa->ifa_addr;
 2053                         if (cred->cr_prison &&
 2054                             prison_if(cred, sa))
 2055                                 continue;
 2056                         addrs++;
 2057 #ifdef COMPAT_43
 2058                         if (cmd == OSIOCGIFCONF) {
 2059                                 struct osockaddr *osa =
 2060                                          (struct osockaddr *)&ifr.ifr_addr;
 2061                                 ifr.ifr_addr = *sa;
 2062                                 osa->sa_family = sa->sa_family;
 2063                                 error = copyout(&ifr, ifrp, sizeof ifr);
 2064                                 ifrp++;
 2065                         } else
 2066 #endif
 2067                         if (sa->sa_len <= sizeof(*sa)) {
 2068                                 ifr.ifr_addr = *sa;
 2069                                 error = copyout(&ifr, ifrp, sizeof ifr);
 2070                                 ifrp++;
 2071                         } else {
 2072                                 if (space < (sizeof ifr) + sa->sa_len -
 2073                                             sizeof(*sa))
 2074                                         break;
 2075                                 space -= sa->sa_len - sizeof(*sa);
 2076                                 error = copyout(&ifr, ifrp,
 2077                                                 sizeof ifr.ifr_name);
 2078                                 if (error == 0)
 2079                                         error = copyout(sa, &ifrp->ifr_addr,
 2080                                                         sa->sa_len);
 2081                                 ifrp = (struct ifreq *)
 2082                                         (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
 2083                         }
 2084                         if (error)
 2085                                 break;
 2086                         space -= sizeof ifr;
 2087                 }
 2088                 if (error)
 2089                         break;
 2090                 if (!addrs) {
 2091                         bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
 2092                         error = copyout(&ifr, ifrp, sizeof ifr);
 2093                         if (error)
 2094                                 break;
 2095                         space -= sizeof ifr;
 2096                         ifrp++;
 2097                 }
 2098         }
 2099         ifc->ifc_len -= space;
 2100         return (error);
 2101 }
 2102 
 2103 /*
 2104  * Just like if_promisc(), but for all-multicast-reception mode.
 2105  */
 2106 int
 2107 if_allmulti(struct ifnet *ifp, int onswitch)
 2108 {
 2109         int error = 0;
 2110         struct ifreq ifr;
 2111 
 2112         crit_enter();
 2113 
 2114         if (onswitch) {
 2115                 if (ifp->if_amcount++ == 0) {
 2116                         ifp->if_flags |= IFF_ALLMULTI;
 2117                         ifr.ifr_flags = ifp->if_flags;
 2118                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
 2119                         ifnet_serialize_all(ifp);
 2120                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
 2121                                               NULL);
 2122                         ifnet_deserialize_all(ifp);
 2123                 }
 2124         } else {
 2125                 if (ifp->if_amcount > 1) {
 2126                         ifp->if_amcount--;
 2127                 } else {
 2128                         ifp->if_amcount = 0;
 2129                         ifp->if_flags &= ~IFF_ALLMULTI;
 2130                         ifr.ifr_flags = ifp->if_flags;
 2131                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
 2132                         ifnet_serialize_all(ifp);
 2133                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
 2134                                               NULL);
 2135                         ifnet_deserialize_all(ifp);
 2136                 }
 2137         }
 2138 
 2139         crit_exit();
 2140 
 2141         if (error == 0)
 2142                 rt_ifmsg(ifp);
 2143         return error;
 2144 }
 2145 
 2146 /*
 2147  * Add a multicast listenership to the interface in question.
 2148  * The link layer provides a routine which converts
 2149  */
 2150 int
 2151 if_addmulti(
 2152         struct ifnet *ifp,      /* interface to manipulate */
 2153         struct sockaddr *sa,    /* address to add */
 2154         struct ifmultiaddr **retifma)
 2155 {
 2156         struct sockaddr *llsa, *dupsa;
 2157         int error;
 2158         struct ifmultiaddr *ifma;
 2159 
 2160         /*
 2161          * If the matching multicast address already exists
 2162          * then don't add a new one, just add a reference
 2163          */
 2164         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 2165                 if (sa_equal(sa, ifma->ifma_addr)) {
 2166                         ifma->ifma_refcount++;
 2167                         if (retifma)
 2168                                 *retifma = ifma;
 2169                         return 0;
 2170                 }
 2171         }
 2172 
 2173         /*
 2174          * Give the link layer a chance to accept/reject it, and also
 2175          * find out which AF_LINK address this maps to, if it isn't one
 2176          * already.
 2177          */
 2178         if (ifp->if_resolvemulti) {
 2179                 ifnet_serialize_all(ifp);
 2180                 error = ifp->if_resolvemulti(ifp, &llsa, sa);
 2181                 ifnet_deserialize_all(ifp);
 2182                 if (error) 
 2183                         return error;
 2184         } else {
 2185                 llsa = NULL;
 2186         }
 2187 
 2188         ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
 2189         dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
 2190         bcopy(sa, dupsa, sa->sa_len);
 2191 
 2192         ifma->ifma_addr = dupsa;
 2193         ifma->ifma_lladdr = llsa;
 2194         ifma->ifma_ifp = ifp;
 2195         ifma->ifma_refcount = 1;
 2196         ifma->ifma_protospec = 0;
 2197         rt_newmaddrmsg(RTM_NEWMADDR, ifma);
 2198 
 2199         /*
 2200          * Some network interfaces can scan the address list at
 2201          * interrupt time; lock them out.
 2202          */
 2203         crit_enter();
 2204         TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 2205         crit_exit();
 2206         if (retifma)
 2207                 *retifma = ifma;
 2208 
 2209         if (llsa != NULL) {
 2210                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 2211                         if (sa_equal(ifma->ifma_addr, llsa))
 2212                                 break;
 2213                 }
 2214                 if (ifma) {
 2215                         ifma->ifma_refcount++;
 2216                 } else {
 2217                         ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
 2218                         dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
 2219                         bcopy(llsa, dupsa, llsa->sa_len);
 2220                         ifma->ifma_addr = dupsa;
 2221                         ifma->ifma_ifp = ifp;
 2222                         ifma->ifma_refcount = 1;
 2223                         crit_enter();
 2224                         TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 2225                         crit_exit();
 2226                 }
 2227         }
 2228         /*
 2229          * We are certain we have added something, so call down to the
 2230          * interface to let them know about it.
 2231          */
 2232         crit_enter();
 2233         ifnet_serialize_all(ifp);
 2234         if (ifp->if_ioctl)
 2235                 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
 2236         ifnet_deserialize_all(ifp);
 2237         crit_exit();
 2238 
 2239         return 0;
 2240 }
 2241 
 2242 /*
 2243  * Remove a reference to a multicast address on this interface.  Yell
 2244  * if the request does not match an existing membership.
 2245  */
 2246 int
 2247 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
 2248 {
 2249         struct ifmultiaddr *ifma;
 2250 
 2251         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 2252                 if (sa_equal(sa, ifma->ifma_addr))
 2253                         break;
 2254         if (ifma == NULL)
 2255                 return ENOENT;
 2256 
 2257         if (ifma->ifma_refcount > 1) {
 2258                 ifma->ifma_refcount--;
 2259                 return 0;
 2260         }
 2261 
 2262         rt_newmaddrmsg(RTM_DELMADDR, ifma);
 2263         sa = ifma->ifma_lladdr;
 2264         crit_enter();
 2265         TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
 2266         /*
 2267          * Make sure the interface driver is notified
 2268          * in the case of a link layer mcast group being left.
 2269          */
 2270         if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
 2271                 ifnet_serialize_all(ifp);
 2272                 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
 2273                 ifnet_deserialize_all(ifp);
 2274         }
 2275         crit_exit();
 2276         kfree(ifma->ifma_addr, M_IFMADDR);
 2277         kfree(ifma, M_IFMADDR);
 2278         if (sa == NULL)
 2279                 return 0;
 2280 
 2281         /*
 2282          * Now look for the link-layer address which corresponds to
 2283          * this network address.  It had been squirreled away in
 2284          * ifma->ifma_lladdr for this purpose (so we don't have
 2285          * to call ifp->if_resolvemulti() again), and we saved that
 2286          * value in sa above.  If some nasty deleted the
 2287          * link-layer address out from underneath us, we can deal because
 2288          * the address we stored was is not the same as the one which was
 2289          * in the record for the link-layer address.  (So we don't complain
 2290          * in that case.)
 2291          */
 2292         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 2293                 if (sa_equal(sa, ifma->ifma_addr))
 2294                         break;
 2295         if (ifma == NULL)
 2296                 return 0;
 2297 
 2298         if (ifma->ifma_refcount > 1) {
 2299                 ifma->ifma_refcount--;
 2300                 return 0;
 2301         }
 2302 
 2303         crit_enter();
 2304         ifnet_serialize_all(ifp);
 2305         TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
 2306         ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
 2307         ifnet_deserialize_all(ifp);
 2308         crit_exit();
 2309         kfree(ifma->ifma_addr, M_IFMADDR);
 2310         kfree(sa, M_IFMADDR);
 2311         kfree(ifma, M_IFMADDR);
 2312 
 2313         return 0;
 2314 }
 2315 
 2316 /*
 2317  * Delete all multicast group membership for an interface.
 2318  * Should be used to quickly flush all multicast filters.
 2319  */
 2320 void
 2321 if_delallmulti(struct ifnet *ifp)
 2322 {
 2323         struct ifmultiaddr *ifma;
 2324         struct ifmultiaddr *next;
 2325 
 2326         TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
 2327                 if_delmulti(ifp, ifma->ifma_addr);
 2328 }
 2329 
 2330 
 2331 /*
 2332  * Set the link layer address on an interface.
 2333  *
 2334  * At this time we only support certain types of interfaces,
 2335  * and we don't allow the length of the address to change.
 2336  */
 2337 int
 2338 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 2339 {
 2340         struct sockaddr_dl *sdl;
 2341         struct ifreq ifr;
 2342 
 2343         sdl = IF_LLSOCKADDR(ifp);
 2344         if (sdl == NULL)
 2345                 return (EINVAL);
 2346         if (len != sdl->sdl_alen)       /* don't allow length to change */
 2347                 return (EINVAL);
 2348         switch (ifp->if_type) {
 2349         case IFT_ETHER:                 /* these types use struct arpcom */
 2350         case IFT_XETHER:
 2351         case IFT_L2VLAN:
 2352                 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
 2353                 bcopy(lladdr, LLADDR(sdl), len);
 2354                 break;
 2355         default:
 2356                 return (ENODEV);
 2357         }
 2358         /*
 2359          * If the interface is already up, we need
 2360          * to re-init it in order to reprogram its
 2361          * address filter.
 2362          */
 2363         ifnet_serialize_all(ifp);
 2364         if ((ifp->if_flags & IFF_UP) != 0) {
 2365 #ifdef INET
 2366                 struct ifaddr_container *ifac;
 2367 #endif
 2368 
 2369                 ifp->if_flags &= ~IFF_UP;
 2370                 ifr.ifr_flags = ifp->if_flags;
 2371                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
 2372                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
 2373                               NULL);
 2374                 ifp->if_flags |= IFF_UP;
 2375                 ifr.ifr_flags = ifp->if_flags;
 2376                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
 2377                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
 2378                                  NULL);
 2379 #ifdef INET
 2380                 /*
 2381                  * Also send gratuitous ARPs to notify other nodes about
 2382                  * the address change.
 2383                  */
 2384                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
 2385                         struct ifaddr *ifa = ifac->ifa;
 2386 
 2387                         if (ifa->ifa_addr != NULL &&
 2388                             ifa->ifa_addr->sa_family == AF_INET)
 2389                                 arp_gratuitous(ifp, ifa);
 2390                 }
 2391 #endif
 2392         }
 2393         ifnet_deserialize_all(ifp);
 2394         return (0);
 2395 }
 2396 
 2397 struct ifmultiaddr *
 2398 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
 2399 {
 2400         struct ifmultiaddr *ifma;
 2401 
 2402         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
 2403                 if (sa_equal(ifma->ifma_addr, sa))
 2404                         break;
 2405 
 2406         return ifma;
 2407 }
 2408 
 2409 /*
 2410  * This function locates the first real ethernet MAC from a network
 2411  * card and loads it into node, returning 0 on success or ENOENT if
 2412  * no suitable interfaces were found.  It is used by the uuid code to
 2413  * generate a unique 6-byte number.
 2414  */
 2415 int
 2416 if_getanyethermac(uint16_t *node, int minlen)
 2417 {
 2418         struct ifnet *ifp;
 2419         struct sockaddr_dl *sdl;
 2420 
 2421         TAILQ_FOREACH(ifp, &ifnet, if_link) {
 2422                 if (ifp->if_type != IFT_ETHER)
 2423                         continue;
 2424                 sdl = IF_LLSOCKADDR(ifp);
 2425                 if (sdl->sdl_alen < minlen)
 2426                         continue;
 2427                 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
 2428                       minlen);
 2429                 return(0);
 2430         }
 2431         return (ENOENT);
 2432 }
 2433 
 2434 /*
 2435  * The name argument must be a pointer to storage which will last as
 2436  * long as the interface does.  For physical devices, the result of
 2437  * device_get_name(dev) is a good choice and for pseudo-devices a
 2438  * static string works well.
 2439  */
 2440 void
 2441 if_initname(struct ifnet *ifp, const char *name, int unit)
 2442 {
 2443         ifp->if_dname = name;
 2444         ifp->if_dunit = unit;
 2445         if (unit != IF_DUNIT_NONE)
 2446                 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
 2447         else
 2448                 strlcpy(ifp->if_xname, name, IFNAMSIZ);
 2449 }
 2450 
 2451 int
 2452 if_printf(struct ifnet *ifp, const char *fmt, ...)
 2453 {
 2454         __va_list ap;
 2455         int retval;
 2456 
 2457         retval = kprintf("%s: ", ifp->if_xname);
 2458         __va_start(ap, fmt);
 2459         retval += kvprintf(fmt, ap);
 2460         __va_end(ap);
 2461         return (retval);
 2462 }
 2463 
 2464 struct ifnet *
 2465 if_alloc(uint8_t type)
 2466 {
 2467         struct ifnet *ifp;
 2468         size_t size;
 2469 
 2470         /*
 2471          * XXX temporary hack until arpcom is setup in if_l2com
 2472          */
 2473         if (type == IFT_ETHER)
 2474                 size = sizeof(struct arpcom);
 2475         else
 2476                 size = sizeof(struct ifnet);
 2477 
 2478         ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
 2479 
 2480         ifp->if_type = type;
 2481 
 2482         if (if_com_alloc[type] != NULL) {
 2483                 ifp->if_l2com = if_com_alloc[type](type, ifp);
 2484                 if (ifp->if_l2com == NULL) {
 2485                         kfree(ifp, M_IFNET);
 2486                         return (NULL);
 2487                 }
 2488         }
 2489         return (ifp);
 2490 }
 2491 
 2492 void
 2493 if_free(struct ifnet *ifp)
 2494 {
 2495         kfree(ifp, M_IFNET);
 2496 }
 2497 
 2498 void
 2499 ifq_set_classic(struct ifaltq *ifq)
 2500 {
 2501         ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq,
 2502             ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request);
 2503 }
 2504 
 2505 void
 2506 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq,
 2507     ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request)
 2508 {
 2509         int q;
 2510 
 2511         KASSERT(mapsubq != NULL, ("mapsubq is not specified"));
 2512         KASSERT(enqueue != NULL, ("enqueue is not specified"));
 2513         KASSERT(dequeue != NULL, ("dequeue is not specified"));
 2514         KASSERT(request != NULL, ("request is not specified"));
 2515 
 2516         ifq->altq_mapsubq = mapsubq;
 2517         for (q = 0; q < ifq->altq_subq_cnt; ++q) {
 2518                 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
 2519 
 2520                 ifsq->ifsq_enqueue = enqueue;
 2521                 ifsq->ifsq_dequeue = dequeue;
 2522                 ifsq->ifsq_request = request;
 2523         }
 2524 }
 2525 
 2526 static void
 2527 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m)
 2528 {
 2529         m->m_nextpkt = NULL;
 2530         if (ifsq->ifsq_norm_tail == NULL)
 2531                 ifsq->ifsq_norm_head = m;
 2532         else
 2533                 ifsq->ifsq_norm_tail->m_nextpkt = m;
 2534         ifsq->ifsq_norm_tail = m;
 2535         ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len);
 2536 }
 2537 
 2538 static void
 2539 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m)
 2540 {
 2541         m->m_nextpkt = NULL;
 2542         if (ifsq->ifsq_prio_tail == NULL)
 2543                 ifsq->ifsq_prio_head = m;
 2544         else
 2545                 ifsq->ifsq_prio_tail->m_nextpkt = m;
 2546         ifsq->ifsq_prio_tail = m;
 2547         ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len);
 2548         ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len);
 2549 }
 2550 
 2551 static struct mbuf *
 2552 ifsq_norm_dequeue(struct ifaltq_subque *ifsq)
 2553 {
 2554         struct mbuf *m;
 2555 
 2556         m = ifsq->ifsq_norm_head;
 2557         if (m != NULL) {
 2558                 if ((ifsq->ifsq_norm_head = m->m_nextpkt) == NULL)
 2559                         ifsq->ifsq_norm_tail = NULL;
 2560                 m->m_nextpkt = NULL;
 2561                 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len);
 2562         }
 2563         return m;
 2564 }
 2565 
 2566 static struct mbuf *
 2567 ifsq_prio_dequeue(struct ifaltq_subque *ifsq)
 2568 {
 2569         struct mbuf *m;
 2570 
 2571         m = ifsq->ifsq_prio_head;
 2572         if (m != NULL) {
 2573                 if ((ifsq->ifsq_prio_head = m->m_nextpkt) == NULL)
 2574                         ifsq->ifsq_prio_tail = NULL;
 2575                 m->m_nextpkt = NULL;
 2576                 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len);
 2577                 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len);
 2578         }
 2579         return m;
 2580 }
 2581 
 2582 int
 2583 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m,
 2584     struct altq_pktattr *pa __unused)
 2585 {
 2586         M_ASSERTPKTHDR(m);
 2587         if (ifsq->ifsq_len >= ifsq->ifsq_maxlen ||
 2588             ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) {
 2589                 if ((m->m_flags & M_PRIO) &&
 2590                     ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen / 2) &&
 2591                     ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt / 2)) {
 2592                         struct mbuf *m_drop;
 2593 
 2594                         /*
 2595                          * Perform drop-head on normal queue
 2596                          */
 2597                         m_drop = ifsq_norm_dequeue(ifsq);
 2598                         if (m_drop != NULL) {
 2599                                 m_freem(m_drop);
 2600                                 ifsq_prio_enqueue(ifsq, m);
 2601                                 return 0;
 2602                         }
 2603                         /* XXX nothing could be dropped? */
 2604                 }
 2605                 m_freem(m);
 2606                 return ENOBUFS;
 2607         } else {
 2608                 if (m->m_flags & M_PRIO)
 2609                         ifsq_prio_enqueue(ifsq, m);
 2610                 else
 2611                         ifsq_norm_enqueue(ifsq, m);
 2612                 return 0;
 2613         }
 2614 }
 2615 
 2616 struct mbuf *
 2617 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op)
 2618 {
 2619         struct mbuf *m;
 2620 
 2621         switch (op) {
 2622         case ALTDQ_POLL:
 2623                 m = ifsq->ifsq_prio_head;
 2624                 if (m == NULL)
 2625                         m = ifsq->ifsq_norm_head;
 2626                 break;
 2627 
 2628         case ALTDQ_REMOVE:
 2629                 m = ifsq_prio_dequeue(ifsq);
 2630                 if (m == NULL)
 2631                         m = ifsq_norm_dequeue(ifsq);
 2632                 break;
 2633 
 2634         default:
 2635                 panic("unsupported ALTQ dequeue op: %d", op);
 2636         }
 2637         return m;
 2638 }
 2639 
 2640 int
 2641 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg)
 2642 {
 2643         switch (req) {
 2644         case ALTRQ_PURGE:
 2645                 for (;;) {
 2646                         struct mbuf *m;
 2647 
 2648                         m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE);
 2649                         if (m == NULL)
 2650                                 break;
 2651                         m_freem(m);
 2652                 }
 2653                 break;
 2654 
 2655         default:
 2656                 panic("unsupported ALTQ request: %d", req);
 2657         }
 2658         return 0;
 2659 }
 2660 
 2661 static void
 2662 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched)
 2663 {
 2664         struct ifnet *ifp = ifsq_get_ifp(ifsq);
 2665         int running = 0, need_sched;
 2666 
 2667         /*
 2668          * Try to do direct ifnet.if_start on the subqueue first, if there is
 2669          * contention on the subqueue hardware serializer, ifnet.if_start on
 2670          * the subqueue will be scheduled on the subqueue owner CPU.
 2671          */
 2672         if (!ifsq_tryserialize_hw(ifsq)) {
 2673                 /*
 2674                  * Subqueue hardware serializer contention happened,
 2675                  * ifnet.if_start on the subqueue is scheduled on
 2676                  * the subqueue owner CPU, and we keep going.
 2677                  */
 2678                 ifsq_ifstart_schedule(ifsq, 1);
 2679                 return;
 2680         }
 2681 
 2682         if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
 2683                 ifp->if_start(ifp, ifsq);
 2684                 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
 2685                         running = 1;
 2686         }
 2687         need_sched = ifsq_ifstart_need_schedule(ifsq, running);
 2688 
 2689         ifsq_deserialize_hw(ifsq);
 2690 
 2691         if (need_sched) {
 2692                 /*
 2693                  * More data need to be transmitted, ifnet.if_start on the
 2694                  * subqueue is scheduled on the subqueue owner CPU, and we
 2695                  * keep going.
 2696                  * NOTE: ifnet.if_start subqueue interlock is not released.
 2697                  */
 2698                 ifsq_ifstart_schedule(ifsq, force_sched);
 2699         }
 2700 }
 2701 
 2702 /*
 2703  * Subqeue packets staging mechanism:
 2704  *
 2705  * The packets enqueued into the subqueue are staged to a certain amount
 2706  * before the ifnet.if_start on the subqueue is called.  In this way, the
 2707  * driver could avoid writing to hardware registers upon every packet,
 2708  * instead, hardware registers could be written when certain amount of
 2709  * packets are put onto hardware TX ring.  The measurement on several modern
 2710  * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware
 2711  * registers writing aggregation could save ~20% CPU time when 18bytes UDP
 2712  * datagrams are transmitted at 1.48Mpps.  The performance improvement by
 2713  * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's
 2714  * netmap paper (http://info.iet.unipi.it/~luigi/netmap/).
 2715  *
 2716  * Subqueue packets staging is performed for two entry points into drivers'
 2717  * transmission function:
 2718  * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try()
 2719  * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule()
 2720  *
 2721  * Subqueue packets staging will be stopped upon any of the following
 2722  * conditions:
 2723  * - If the count of packets enqueued on the current CPU is great than or
 2724  *   equal to ifsq_stage_cntmax. (XXX this should be per-interface)
 2725  * - If the total length of packets enqueued on the current CPU is great
 2726  *   than or equal to the hardware's MTU - max_protohdr.  max_protohdr is
 2727  *   cut from the hardware's MTU mainly bacause a full TCP segment's size
 2728  *   is usually less than hardware's MTU.
 2729  * - ifsq_ifstart_schedule() is not pending on the current CPU and
 2730  *   ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not
 2731  *   released.
 2732  * - The if_start_rollup(), which is registered as low priority netisr
 2733  *   rollup function, is called; probably because no more work is pending
 2734  *   for netisr.
 2735  *
 2736  * NOTE:
 2737  * Currently subqueue packet staging is only performed in netisr threads.
 2738  */
 2739 int
 2740 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
 2741 {
 2742         struct ifaltq *ifq = &ifp->if_snd;
 2743         struct ifaltq_subque *ifsq;
 2744         int error, start = 0, len, mcast = 0, avoid_start = 0;
 2745         struct ifsubq_stage_head *head = NULL;
 2746         struct ifsubq_stage *stage = NULL;
 2747 
 2748         ifsq = ifq_map_subq(ifq, mycpuid);
 2749         ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq);
 2750 
 2751         len = m->m_pkthdr.len;
 2752         if (m->m_flags & M_MCAST)
 2753                 mcast = 1;
 2754 
 2755         if (curthread->td_type == TD_TYPE_NETISR) {
 2756                 head = &ifsubq_stage_heads[mycpuid];
 2757                 stage = ifsq_get_stage(ifsq, mycpuid);
 2758 
 2759                 stage->stg_cnt++;
 2760                 stage->stg_len += len;
 2761                 if (stage->stg_cnt < ifsq_stage_cntmax &&
 2762                     stage->stg_len < (ifp->if_mtu - max_protohdr))
 2763                         avoid_start = 1;
 2764         }
 2765 
 2766         ALTQ_SQ_LOCK(ifsq);
 2767         error = ifsq_enqueue_locked(ifsq, m, pa);
 2768         if (error) {
 2769                 if (!ifsq_data_ready(ifsq)) {
 2770                         ALTQ_SQ_UNLOCK(ifsq);
 2771                         return error;
 2772                 }
 2773                 avoid_start = 0;
 2774         }
 2775         if (!ifsq_is_started(ifsq)) {
 2776                 if (avoid_start) {
 2777                         ALTQ_SQ_UNLOCK(ifsq);
 2778 
 2779                         KKASSERT(!error);
 2780                         if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
 2781                                 ifsq_stage_insert(head, stage);
 2782 
 2783                         IFNET_STAT_INC(ifp, obytes, len);
 2784                         if (mcast)
 2785                                 IFNET_STAT_INC(ifp, omcasts, 1);
 2786                         return error;
 2787                 }
 2788 
 2789                 /*
 2790                  * Hold the subqueue interlock of ifnet.if_start
 2791                  */
 2792                 ifsq_set_started(ifsq);
 2793                 start = 1;
 2794         }
 2795         ALTQ_SQ_UNLOCK(ifsq);
 2796 
 2797         if (!error) {
 2798                 IFNET_STAT_INC(ifp, obytes, len);
 2799                 if (mcast)
 2800                         IFNET_STAT_INC(ifp, omcasts, 1);
 2801         }
 2802 
 2803         if (stage != NULL) {
 2804                 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) {
 2805                         KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
 2806                         if (!avoid_start) {
 2807                                 ifsq_stage_remove(head, stage);
 2808                                 ifsq_ifstart_schedule(ifsq, 1);
 2809                         }
 2810                         return error;
 2811                 }
 2812 
 2813                 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) {
 2814                         ifsq_stage_remove(head, stage);
 2815                 } else {
 2816                         stage->stg_cnt = 0;
 2817                         stage->stg_len = 0;
 2818                 }
 2819         }
 2820 
 2821         if (!start)
 2822                 return error;
 2823 
 2824         ifsq_ifstart_try(ifsq, 0);
 2825         return error;
 2826 }
 2827 
 2828 void *
 2829 ifa_create(int size, int flags)
 2830 {
 2831         struct ifaddr *ifa;
 2832         int i;
 2833 
 2834         KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
 2835 
 2836         ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
 2837         if (ifa == NULL)
 2838                 return NULL;
 2839 
 2840         ifa->ifa_containers =
 2841             kmalloc_cachealign(ncpus * sizeof(struct ifaddr_container),
 2842                 M_IFADDR, M_WAITOK | M_ZERO);
 2843         ifa->ifa_ncnt = ncpus;
 2844         for (i = 0; i < ncpus; ++i) {
 2845                 struct ifaddr_container *ifac = &ifa->ifa_containers[i];
 2846 
 2847                 ifac->ifa_magic = IFA_CONTAINER_MAGIC;
 2848                 ifac->ifa = ifa;
 2849                 ifac->ifa_refcnt = 1;
 2850         }
 2851 #ifdef IFADDR_DEBUG
 2852         kprintf("alloc ifa %p %d\n", ifa, size);
 2853 #endif
 2854         return ifa;
 2855 }
 2856 
 2857 void
 2858 ifac_free(struct ifaddr_container *ifac, int cpu_id)
 2859 {
 2860         struct ifaddr *ifa = ifac->ifa;
 2861 
 2862         KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
 2863         KKASSERT(ifac->ifa_refcnt == 0);
 2864         KASSERT(ifac->ifa_listmask == 0,
 2865                 ("ifa is still on %#x lists", ifac->ifa_listmask));
 2866 
 2867         ifac->ifa_magic = IFA_CONTAINER_DEAD;
 2868 
 2869 #ifdef IFADDR_DEBUG_VERBOSE
 2870         kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
 2871 #endif
 2872 
 2873         KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
 2874                 ("invalid # of ifac, %d", ifa->ifa_ncnt));
 2875         if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
 2876 #ifdef IFADDR_DEBUG
 2877                 kprintf("free ifa %p\n", ifa);
 2878 #endif
 2879                 kfree(ifa->ifa_containers, M_IFADDR);
 2880                 kfree(ifa, M_IFADDR);
 2881         }
 2882 }
 2883 
 2884 static void
 2885 ifa_iflink_dispatch(netmsg_t nmsg)
 2886 {
 2887         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
 2888         struct ifaddr *ifa = msg->ifa;
 2889         struct ifnet *ifp = msg->ifp;
 2890         int cpu = mycpuid;
 2891         struct ifaddr_container *ifac;
 2892 
 2893         crit_enter();
 2894 
 2895         ifac = &ifa->ifa_containers[cpu];
 2896         ASSERT_IFAC_VALID(ifac);
 2897         KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
 2898                 ("ifaddr is on if_addrheads"));
 2899 
 2900         ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
 2901         if (msg->tail)
 2902                 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
 2903         else
 2904                 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
 2905 
 2906         crit_exit();
 2907 
 2908         ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
 2909 }
 2910 
 2911 void
 2912 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
 2913 {
 2914         struct netmsg_ifaddr msg;
 2915 
 2916         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
 2917                     0, ifa_iflink_dispatch);
 2918         msg.ifa = ifa;
 2919         msg.ifp = ifp;
 2920         msg.tail = tail;
 2921 
 2922         ifa_domsg(&msg.base.lmsg, 0);
 2923 }
 2924 
 2925 static void
 2926 ifa_ifunlink_dispatch(netmsg_t nmsg)
 2927 {
 2928         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
 2929         struct ifaddr *ifa = msg->ifa;
 2930         struct ifnet *ifp = msg->ifp;
 2931         int cpu = mycpuid;
 2932         struct ifaddr_container *ifac;
 2933 
 2934         crit_enter();
 2935 
 2936         ifac = &ifa->ifa_containers[cpu];
 2937         ASSERT_IFAC_VALID(ifac);
 2938         KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
 2939                 ("ifaddr is not on if_addrhead"));
 2940 
 2941         TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
 2942         ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
 2943 
 2944         crit_exit();
 2945 
 2946         ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
 2947 }
 2948 
 2949 void
 2950 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
 2951 {
 2952         struct netmsg_ifaddr msg;
 2953 
 2954         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
 2955                     0, ifa_ifunlink_dispatch);
 2956         msg.ifa = ifa;
 2957         msg.ifp = ifp;
 2958 
 2959         ifa_domsg(&msg.base.lmsg, 0);
 2960 }
 2961 
 2962 static void
 2963 ifa_destroy_dispatch(netmsg_t nmsg)
 2964 {
 2965         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
 2966 
 2967         IFAFREE(msg->ifa);
 2968         ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
 2969 }
 2970 
 2971 void
 2972 ifa_destroy(struct ifaddr *ifa)
 2973 {
 2974         struct netmsg_ifaddr msg;
 2975 
 2976         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
 2977                     0, ifa_destroy_dispatch);
 2978         msg.ifa = ifa;
 2979 
 2980         ifa_domsg(&msg.base.lmsg, 0);
 2981 }
 2982 
 2983 struct lwkt_port *
 2984 ifnet_portfn(int cpu)
 2985 {
 2986         return &ifnet_threads[cpu].td_msgport;
 2987 }
 2988 
 2989 void
 2990 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
 2991 {
 2992         KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
 2993 
 2994         if (next_cpu < ncpus)
 2995                 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
 2996         else
 2997                 lwkt_replymsg(lmsg, 0);
 2998 }
 2999 
 3000 int
 3001 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
 3002 {
 3003         KKASSERT(cpu < ncpus);
 3004         return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
 3005 }
 3006 
 3007 void
 3008 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
 3009 {
 3010         KKASSERT(cpu < ncpus);
 3011         lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
 3012 }
 3013 
 3014 /*
 3015  * Generic netmsg service loop.  Some protocols may roll their own but all
 3016  * must do the basic command dispatch function call done here.
 3017  */
 3018 static void
 3019 ifnet_service_loop(void *arg __unused)
 3020 {
 3021         netmsg_t msg;
 3022 
 3023         while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
 3024                 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
 3025                 msg->base.nm_dispatch(msg);
 3026         }
 3027 }
 3028 
 3029 static void
 3030 if_start_rollup(void)
 3031 {
 3032         struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid];
 3033         struct ifsubq_stage *stage;
 3034 
 3035         while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) {
 3036                 struct ifaltq_subque *ifsq = stage->stg_subq;
 3037                 int is_sched = 0;
 3038 
 3039                 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)
 3040                         is_sched = 1;
 3041                 ifsq_stage_remove(head, stage);
 3042 
 3043                 if (is_sched) {
 3044                         ifsq_ifstart_schedule(ifsq, 1);
 3045                 } else {
 3046                         int start = 0;
 3047 
 3048                         ALTQ_SQ_LOCK(ifsq);
 3049                         if (!ifsq_is_started(ifsq)) {
 3050                                 /*
 3051                                  * Hold the subqueue interlock of
 3052                                  * ifnet.if_start
 3053                                  */
 3054                                 ifsq_set_started(ifsq);
 3055                                 start = 1;
 3056                         }
 3057                         ALTQ_SQ_UNLOCK(ifsq);
 3058 
 3059                         if (start)
 3060                                 ifsq_ifstart_try(ifsq, 1);
 3061                 }
 3062                 KKASSERT((stage->stg_flags &
 3063                     (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
 3064         }
 3065 }
 3066 
 3067 static void
 3068 ifnetinit(void *dummy __unused)
 3069 {
 3070         int i;
 3071 
 3072         for (i = 0; i < ncpus; ++i) {
 3073                 struct thread *thr = &ifnet_threads[i];
 3074 
 3075                 lwkt_create(ifnet_service_loop, NULL, NULL,
 3076                             thr, TDF_NOSTART|TDF_FORCE_SPINPORT|TDF_FIXEDCPU,
 3077                             i, "ifnet %d", i);
 3078                 netmsg_service_port_init(&thr->td_msgport);
 3079                 lwkt_schedule(thr);
 3080         }
 3081 
 3082         for (i = 0; i < ncpus; ++i)
 3083                 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head);
 3084         netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART);
 3085 }
 3086 
 3087 struct ifnet *
 3088 ifnet_byindex(unsigned short idx)
 3089 {
 3090         if (idx > if_index)
 3091                 return NULL;
 3092         return ifindex2ifnet[idx];
 3093 }
 3094 
 3095 struct ifaddr *
 3096 ifaddr_byindex(unsigned short idx)
 3097 {
 3098         struct ifnet *ifp;
 3099 
 3100         ifp = ifnet_byindex(idx);
 3101         if (!ifp)
 3102                 return NULL;
 3103         return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
 3104 }
 3105 
 3106 void
 3107 if_register_com_alloc(u_char type,
 3108     if_com_alloc_t *a, if_com_free_t *f)
 3109 {
 3110 
 3111         KASSERT(if_com_alloc[type] == NULL,
 3112             ("if_register_com_alloc: %d already registered", type));
 3113         KASSERT(if_com_free[type] == NULL,
 3114             ("if_register_com_alloc: %d free already registered", type));
 3115 
 3116         if_com_alloc[type] = a;
 3117         if_com_free[type] = f;
 3118 }
 3119 
 3120 void
 3121 if_deregister_com_alloc(u_char type)
 3122 {
 3123 
 3124         KASSERT(if_com_alloc[type] != NULL,
 3125             ("if_deregister_com_alloc: %d not registered", type));
 3126         KASSERT(if_com_free[type] != NULL,
 3127             ("if_deregister_com_alloc: %d free not registered", type));
 3128         if_com_alloc[type] = NULL;
 3129         if_com_free[type] = NULL;
 3130 }
 3131 
 3132 int
 3133 if_ring_count2(int cnt, int cnt_max)
 3134 {
 3135         int shift = 0;
 3136 
 3137         KASSERT(cnt_max >= 1 && powerof2(cnt_max),
 3138             ("invalid ring count max %d", cnt_max));
 3139 
 3140         if (cnt <= 0)
 3141                 cnt = cnt_max;
 3142         if (cnt > ncpus2)
 3143                 cnt = ncpus2;
 3144         if (cnt > cnt_max)
 3145                 cnt = cnt_max;
 3146 
 3147         while ((1 << (shift + 1)) <= cnt)
 3148                 ++shift;
 3149         cnt = 1 << shift;
 3150 
 3151         KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max,
 3152             ("calculate cnt %d, ncpus2 %d, cnt max %d",
 3153              cnt, ncpus2, cnt_max));
 3154         return cnt;
 3155 }
 3156 
 3157 void
 3158 ifq_set_maxlen(struct ifaltq *ifq, int len)
 3159 {
 3160         ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax);
 3161 }
 3162 
 3163 int
 3164 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused)
 3165 {
 3166         return ALTQ_SUBQ_INDEX_DEFAULT;
 3167 }
 3168 
 3169 int
 3170 ifq_mapsubq_mask(struct ifaltq *ifq, int cpuid)
 3171 {
 3172         return (cpuid & ifq->altq_subq_mask);
 3173 }
 3174 
 3175 static void
 3176 ifsq_watchdog(void *arg)
 3177 {
 3178         struct ifsubq_watchdog *wd = arg;
 3179         struct ifnet *ifp;
 3180 
 3181         if (__predict_true(wd->wd_timer == 0 || --wd->wd_timer))
 3182                 goto done;
 3183 
 3184         ifp = ifsq_get_ifp(wd->wd_subq);
 3185         if (ifnet_tryserialize_all(ifp)) {
 3186                 wd->wd_watchdog(wd->wd_subq);
 3187                 ifnet_deserialize_all(ifp);
 3188         } else {
 3189                 /* try again next timeout */
 3190                 wd->wd_timer = 1;
 3191         }
 3192 done:
 3193         ifsq_watchdog_reset(wd);
 3194 }
 3195 
 3196 static void
 3197 ifsq_watchdog_reset(struct ifsubq_watchdog *wd)
 3198 {
 3199         callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd,
 3200             ifsq_get_cpuid(wd->wd_subq));
 3201 }
 3202 
 3203 void
 3204 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq,
 3205     ifsq_watchdog_t watchdog)
 3206 {
 3207         callout_init_mp(&wd->wd_callout);
 3208         wd->wd_timer = 0;
 3209         wd->wd_subq = ifsq;
 3210         wd->wd_watchdog = watchdog;
 3211 }
 3212 
 3213 void
 3214 ifsq_watchdog_start(struct ifsubq_watchdog *wd)
 3215 {
 3216         wd->wd_timer = 0;
 3217         ifsq_watchdog_reset(wd);
 3218 }
 3219 
 3220 void
 3221 ifsq_watchdog_stop(struct ifsubq_watchdog *wd)
 3222 {
 3223         wd->wd_timer = 0;
 3224         callout_stop(&wd->wd_callout);
 3225 }

Cache object: 0b3ea36c5bf4cf35e601c5bee799aa8e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.