The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet6/mld6.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 2009 Bruce Simpson.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  * 3. The name of the author may not be used to endorse or promote
   15  *    products derived from this software without specific prior written
   16  *    permission.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  *
   30  *      $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $
   31  */
   32 
   33 /*-
   34  * Copyright (c) 1988 Stephen Deering.
   35  * Copyright (c) 1992, 1993
   36  *      The Regents of the University of California.  All rights reserved.
   37  *
   38  * This code is derived from software contributed to Berkeley by
   39  * Stephen Deering of Stanford University.
   40  *
   41  * Redistribution and use in source and binary forms, with or without
   42  * modification, are permitted provided that the following conditions
   43  * are met:
   44  * 1. Redistributions of source code must retain the above copyright
   45  *    notice, this list of conditions and the following disclaimer.
   46  * 2. Redistributions in binary form must reproduce the above copyright
   47  *    notice, this list of conditions and the following disclaimer in the
   48  *    documentation and/or other materials provided with the distribution.
   49  * 3. Neither the name of the University nor the names of its contributors
   50  *    may be used to endorse or promote products derived from this software
   51  *    without specific prior written permission.
   52  *
   53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   63  * SUCH DAMAGE.
   64  *
   65  *      @(#)igmp.c      8.1 (Berkeley) 7/19/93
   66  */
   67 
   68 #include <sys/cdefs.h>
   69 __FBSDID("$FreeBSD$");
   70 
   71 #include "opt_inet.h"
   72 #include "opt_inet6.h"
   73 
   74 #include <sys/param.h>
   75 #include <sys/systm.h>
   76 #include <sys/mbuf.h>
   77 #include <sys/socket.h>
   78 #include <sys/sysctl.h>
   79 #include <sys/kernel.h>
   80 #include <sys/callout.h>
   81 #include <sys/malloc.h>
   82 #include <sys/module.h>
   83 #include <sys/ktr.h>
   84 
   85 #include <net/if.h>
   86 #include <net/if_var.h>
   87 #include <net/route.h>
   88 #include <net/vnet.h>
   89 
   90 #include <netinet/in.h>
   91 #include <netinet/in_var.h>
   92 #include <netinet6/in6_var.h>
   93 #include <netinet/ip6.h>
   94 #include <netinet6/ip6_var.h>
   95 #include <netinet6/scope6_var.h>
   96 #include <netinet/icmp6.h>
   97 #include <netinet6/mld6.h>
   98 #include <netinet6/mld6_var.h>
   99 
  100 #include <security/mac/mac_framework.h>
  101 
  102 #ifndef KTR_MLD
  103 #define KTR_MLD KTR_INET6
  104 #endif
  105 
  106 static void     mli_delete_locked(struct ifnet *);
  107 static void     mld_dispatch_packet(struct mbuf *);
  108 static void     mld_dispatch_queue(struct mbufq *, int);
  109 static void     mld_final_leave(struct in6_multi *, struct mld_ifsoftc *);
  110 static void     mld_fasttimo_vnet(struct in6_multi_head *inmh);
  111 static int      mld_handle_state_change(struct in6_multi *,
  112                     struct mld_ifsoftc *);
  113 static int      mld_initial_join(struct in6_multi *, struct mld_ifsoftc *,
  114                     const int);
  115 #ifdef KTR
  116 static char *   mld_rec_type_to_str(const int);
  117 #endif
  118 static void     mld_set_version(struct mld_ifsoftc *, const int);
  119 static void     mld_slowtimo_vnet(void);
  120 static int      mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
  121                     /*const*/ struct mld_hdr *);
  122 static int      mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
  123                     /*const*/ struct mld_hdr *);
  124 static void     mld_v1_process_group_timer(struct in6_multi_head *,
  125                     struct in6_multi *);
  126 static void     mld_v1_process_querier_timers(struct mld_ifsoftc *);
  127 static int      mld_v1_transmit_report(struct in6_multi *, const int);
  128 static void     mld_v1_update_group(struct in6_multi *, const int);
  129 static void     mld_v2_cancel_link_timers(struct mld_ifsoftc *);
  130 static void     mld_v2_dispatch_general_query(struct mld_ifsoftc *);
  131 static struct mbuf *
  132                 mld_v2_encap_report(struct ifnet *, struct mbuf *);
  133 static int      mld_v2_enqueue_filter_change(struct mbufq *,
  134                     struct in6_multi *);
  135 static int      mld_v2_enqueue_group_record(struct mbufq *,
  136                     struct in6_multi *, const int, const int, const int,
  137                     const int);
  138 static int      mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
  139                     struct mbuf *, struct mldv2_query *, const int, const int);
  140 static int      mld_v2_merge_state_changes(struct in6_multi *,
  141                     struct mbufq *);
  142 static void     mld_v2_process_group_timers(struct in6_multi_head *,
  143                     struct mbufq *, struct mbufq *,
  144                     struct in6_multi *, const int);
  145 static int      mld_v2_process_group_query(struct in6_multi *,
  146                     struct mld_ifsoftc *mli, int, struct mbuf *,
  147                     struct mldv2_query *, const int);
  148 static int      sysctl_mld_gsr(SYSCTL_HANDLER_ARGS);
  149 static int      sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS);
  150 
  151 /*
  152  * Normative references: RFC 2710, RFC 3590, RFC 3810.
  153  *
  154  * Locking:
  155  *  * The MLD subsystem lock ends up being system-wide for the moment,
  156  *    but could be per-VIMAGE later on.
  157  *  * The permitted lock order is: IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK.
  158  *    Any may be taken independently; if any are held at the same
  159  *    time, the above lock order must be followed.
  160  *  * IN6_MULTI_LOCK covers in_multi.
  161  *  * MLD_LOCK covers per-link state and any global variables in this file.
  162  *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
  163  *    per-link state iterators.
  164  *
  165  *  XXX LOR PREVENTION
  166  *  A special case for IPv6 is the in6_setscope() routine. ip6_output()
  167  *  will not accept an ifp; it wants an embedded scope ID, unlike
  168  *  ip_output(), which happily takes the ifp given to it. The embedded
  169  *  scope ID is only used by MLD to select the outgoing interface.
  170  *
  171  *  During interface attach and detach, MLD will take MLD_LOCK *after*
  172  *  the IF_AFDATA_LOCK.
  173  *  As in6_setscope() takes IF_AFDATA_LOCK then SCOPE_LOCK, we can't call
  174  *  it with MLD_LOCK held without triggering an LOR. A netisr with indirect
  175  *  dispatch could work around this, but we'd rather not do that, as it
  176  *  can introduce other races.
  177  *
  178  *  As such, we exploit the fact that the scope ID is just the interface
  179  *  index, and embed it in the IPv6 destination address accordingly.
  180  *  This is potentially NOT VALID for MLDv1 reports, as they
  181  *  are always sent to the multicast group itself; as MLDv2
  182  *  reports are always sent to ff02::16, this is not an issue
  183  *  when MLDv2 is in use.
  184  *
  185  *  This does not however eliminate the LOR when ip6_output() itself
  186  *  calls in6_setscope() internally whilst MLD_LOCK is held. This will
  187  *  trigger a LOR warning in WITNESS when the ifnet is detached.
  188  *
  189  *  The right answer is probably to make IF_AFDATA_LOCK an rwlock, given
  190  *  how it's used across the network stack. Here we're simply exploiting
  191  *  the fact that MLD runs at a similar layer in the stack to scope6.c.
  192  *
  193  * VIMAGE:
  194  *  * Each in6_multi corresponds to an ifp, and each ifp corresponds
  195  *    to a vnet in ifp->if_vnet.
  196  */
  197 static struct mtx                mld_mtx;
  198 static MALLOC_DEFINE(M_MLD, "mld", "mld state");
  199 
  200 #define MLD_EMBEDSCOPE(pin6, zoneid)                                    \
  201         if (IN6_IS_SCOPE_LINKLOCAL(pin6) ||                             \
  202             IN6_IS_ADDR_MC_INTFACELOCAL(pin6))                          \
  203                 (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF)         \
  204 
  205 /*
  206  * VIMAGE-wide globals.
  207  */
  208 VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0};
  209 VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head);
  210 VNET_DEFINE_STATIC(int, interface_timers_running6);
  211 VNET_DEFINE_STATIC(int, state_change_timers_running6);
  212 VNET_DEFINE_STATIC(int, current_state_timers_running6);
  213 
  214 #define V_mld_gsrdelay                  VNET(mld_gsrdelay)
  215 #define V_mli_head                      VNET(mli_head)
  216 #define V_interface_timers_running6     VNET(interface_timers_running6)
  217 #define V_state_change_timers_running6  VNET(state_change_timers_running6)
  218 #define V_current_state_timers_running6 VNET(current_state_timers_running6)
  219 
  220 SYSCTL_DECL(_net_inet6);        /* Note: Not in any common header. */
  221 
  222 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  223     "IPv6 Multicast Listener Discovery");
  224 
  225 /*
  226  * Virtualized sysctls.
  227  */
  228 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
  229     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  230     &VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I",
  231     "Rate limit for MLDv2 Group-and-Source queries in seconds");
  232 
  233 /*
  234  * Non-virtualized sysctls.
  235  */
  236 static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo,
  237     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_mld_ifinfo,
  238     "Per-interface MLDv2 state");
  239 
  240 static int      mld_v1enable = 1;
  241 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN,
  242     &mld_v1enable, 0, "Enable fallback to MLDv1");
  243 
  244 static int      mld_v2enable = 1;
  245 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v2enable, CTLFLAG_RWTUN,
  246     &mld_v2enable, 0, "Enable MLDv2");
  247 
  248 static int      mld_use_allow = 1;
  249 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN,
  250     &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
  251 
  252 /*
  253  * Packed Router Alert option structure declaration.
  254  */
  255 struct mld_raopt {
  256         struct ip6_hbh          hbh;
  257         struct ip6_opt          pad;
  258         struct ip6_opt_router   ra;
  259 } __packed;
  260 
  261 /*
  262  * Router Alert hop-by-hop option header.
  263  */
  264 static struct mld_raopt mld_ra = {
  265         .hbh = { 0, 0 },
  266         .pad = { .ip6o_type = IP6OPT_PADN, 0 },
  267         .ra = {
  268             .ip6or_type = IP6OPT_ROUTER_ALERT,
  269             .ip6or_len = IP6OPT_RTALERT_LEN - 2,
  270             .ip6or_value[0] = ((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
  271             .ip6or_value[1] = (IP6OPT_RTALERT_MLD & 0xFF)
  272         }
  273 };
  274 static struct ip6_pktopts mld_po;
  275 
  276 static __inline void
  277 mld_save_context(struct mbuf *m, struct ifnet *ifp)
  278 {
  279 
  280 #ifdef VIMAGE
  281         m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
  282 #endif /* VIMAGE */
  283         m->m_pkthdr.rcvif = ifp;
  284         m->m_pkthdr.flowid = ifp->if_index;
  285 }
  286 
  287 static __inline void
  288 mld_scrub_context(struct mbuf *m)
  289 {
  290 
  291         m->m_pkthdr.PH_loc.ptr = NULL;
  292         m->m_pkthdr.flowid = 0;
  293 }
  294 
  295 /*
  296  * Restore context from a queued output chain.
  297  * Return saved ifindex.
  298  *
  299  * VIMAGE: The assertion is there to make sure that we
  300  * actually called CURVNET_SET() with what's in the mbuf chain.
  301  */
  302 static __inline uint32_t
  303 mld_restore_context(struct mbuf *m)
  304 {
  305 
  306 #if defined(VIMAGE) && defined(INVARIANTS)
  307         KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr,
  308             ("%s: called when curvnet was not restored: cuvnet %p m ptr %p",
  309             __func__, curvnet, m->m_pkthdr.PH_loc.ptr));
  310 #endif
  311         return (m->m_pkthdr.flowid);
  312 }
  313 
  314 /*
  315  * Retrieve or set threshold between group-source queries in seconds.
  316  *
  317  * VIMAGE: Assume curvnet set by caller.
  318  * SMPng: NOTE: Serialized by MLD lock.
  319  */
  320 static int
  321 sysctl_mld_gsr(SYSCTL_HANDLER_ARGS)
  322 {
  323         int error;
  324         int i;
  325 
  326         error = sysctl_wire_old_buffer(req, sizeof(int));
  327         if (error)
  328                 return (error);
  329 
  330         MLD_LOCK();
  331 
  332         i = V_mld_gsrdelay.tv_sec;
  333 
  334         error = sysctl_handle_int(oidp, &i, 0, req);
  335         if (error || !req->newptr)
  336                 goto out_locked;
  337 
  338         if (i < -1 || i >= 60) {
  339                 error = EINVAL;
  340                 goto out_locked;
  341         }
  342 
  343         CTR2(KTR_MLD, "change mld_gsrdelay from %d to %d",
  344              V_mld_gsrdelay.tv_sec, i);
  345         V_mld_gsrdelay.tv_sec = i;
  346 
  347 out_locked:
  348         MLD_UNLOCK();
  349         return (error);
  350 }
  351 
  352 /*
  353  * Expose struct mld_ifsoftc to userland, keyed by ifindex.
  354  * For use by ifmcstat(8).
  355  *
  356  * VIMAGE: Assume curvnet set by caller. The node handler itself
  357  * is not directly virtualized.
  358  */
  359 static int
  360 sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS)
  361 {
  362         struct epoch_tracker     et;
  363         int                     *name;
  364         int                      error;
  365         u_int                    namelen;
  366         struct ifnet            *ifp;
  367         struct mld_ifsoftc      *mli;
  368 
  369         name = (int *)arg1;
  370         namelen = arg2;
  371 
  372         if (req->newptr != NULL)
  373                 return (EPERM);
  374 
  375         if (namelen != 1)
  376                 return (EINVAL);
  377 
  378         error = sysctl_wire_old_buffer(req, sizeof(struct mld_ifinfo));
  379         if (error)
  380                 return (error);
  381 
  382         IN6_MULTI_LOCK();
  383         IN6_MULTI_LIST_LOCK();
  384         MLD_LOCK();
  385         NET_EPOCH_ENTER(et);
  386 
  387         error = ENOENT;
  388         ifp = ifnet_byindex(name[0]);
  389         if (ifp == NULL)
  390                 goto out_locked;
  391 
  392         LIST_FOREACH(mli, &V_mli_head, mli_link) {
  393                 if (ifp == mli->mli_ifp) {
  394                         struct mld_ifinfo info;
  395 
  396                         info.mli_version = mli->mli_version;
  397                         info.mli_v1_timer = mli->mli_v1_timer;
  398                         info.mli_v2_timer = mli->mli_v2_timer;
  399                         info.mli_flags = mli->mli_flags;
  400                         info.mli_rv = mli->mli_rv;
  401                         info.mli_qi = mli->mli_qi;
  402                         info.mli_qri = mli->mli_qri;
  403                         info.mli_uri = mli->mli_uri;
  404                         error = SYSCTL_OUT(req, &info, sizeof(info));
  405                         break;
  406                 }
  407         }
  408 
  409 out_locked:
  410         NET_EPOCH_EXIT(et);
  411         MLD_UNLOCK();
  412         IN6_MULTI_LIST_UNLOCK();
  413         IN6_MULTI_UNLOCK();
  414         return (error);
  415 }
  416 
  417 /*
  418  * Dispatch an entire queue of pending packet chains.
  419  * VIMAGE: Assumes the vnet pointer has been set.
  420  */
  421 static void
  422 mld_dispatch_queue(struct mbufq *mq, int limit)
  423 {
  424         struct mbuf *m;
  425 
  426         while ((m = mbufq_dequeue(mq)) != NULL) {
  427                 CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m);
  428                 mld_dispatch_packet(m);
  429                 if (--limit == 0)
  430                         break;
  431         }
  432 }
  433 
  434 /*
  435  * Filter outgoing MLD report state by group.
  436  *
  437  * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
  438  * and node-local addresses. However, kernel and socket consumers
  439  * always embed the KAME scope ID in the address provided, so strip it
  440  * when performing comparison.
  441  * Note: This is not the same as the *multicast* scope.
  442  *
  443  * Return zero if the given group is one for which MLD reports
  444  * should be suppressed, or non-zero if reports should be issued.
  445  */
  446 static __inline int
  447 mld_is_addr_reported(const struct in6_addr *addr)
  448 {
  449 
  450         KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__));
  451 
  452         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
  453                 return (0);
  454 
  455         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
  456                 struct in6_addr tmp = *addr;
  457                 in6_clearscope(&tmp);
  458                 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
  459                         return (0);
  460         }
  461 
  462         return (1);
  463 }
  464 
  465 /*
  466  * Attach MLD when PF_INET6 is attached to an interface.  Assumes that the
  467  * current VNET is set by the caller.
  468  */
  469 struct mld_ifsoftc *
  470 mld_domifattach(struct ifnet *ifp)
  471 {
  472         struct mld_ifsoftc *mli;
  473 
  474         CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp));
  475 
  476         mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_WAITOK | M_ZERO);
  477         mli->mli_ifp = ifp;
  478         mli->mli_version = MLD_VERSION_2;
  479         mli->mli_flags = 0;
  480         mli->mli_rv = MLD_RV_INIT;
  481         mli->mli_qi = MLD_QI_INIT;
  482         mli->mli_qri = MLD_QRI_INIT;
  483         mli->mli_uri = MLD_URI_INIT;
  484         mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS);
  485         if ((ifp->if_flags & IFF_MULTICAST) == 0)
  486                 mli->mli_flags |= MLIF_SILENT;
  487         if (mld_use_allow)
  488                 mli->mli_flags |= MLIF_USEALLOW;
  489 
  490         MLD_LOCK();
  491         LIST_INSERT_HEAD(&V_mli_head, mli, mli_link);
  492         MLD_UNLOCK();
  493 
  494         return (mli);
  495 }
  496 
  497 /*
  498  * Hook for ifdetach.
  499  *
  500  * NOTE: Some finalization tasks need to run before the protocol domain
  501  * is detached, but also before the link layer does its cleanup.
  502  * Run before link-layer cleanup; cleanup groups, but do not free MLD state.
  503  *
  504  * SMPng: Caller must hold IN6_MULTI_LOCK().
  505  * Must take IF_ADDR_LOCK() to cover if_multiaddrs iterator.
  506  * XXX This routine is also bitten by unlocked ifma_protospec access.
  507  */
  508 void
  509 mld_ifdetach(struct ifnet *ifp, struct in6_multi_head *inmh)
  510 {
  511         struct epoch_tracker     et;
  512         struct mld_ifsoftc      *mli;
  513         struct ifmultiaddr      *ifma;
  514         struct in6_multi        *inm;
  515 
  516         CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp,
  517             if_name(ifp));
  518 
  519         IN6_MULTI_LIST_LOCK_ASSERT();
  520         MLD_LOCK();
  521 
  522         mli = MLD_IFINFO(ifp);
  523         IF_ADDR_WLOCK(ifp);
  524         /*
  525          * Extract list of in6_multi associated with the detaching ifp
  526          * which the PF_INET6 layer is about to release.
  527          */
  528         NET_EPOCH_ENTER(et);
  529         CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
  530                 inm = in6m_ifmultiaddr_get_inm(ifma);
  531                 if (inm == NULL)
  532                         continue;
  533                 in6m_disconnect_locked(inmh, inm);
  534 
  535                 if (mli->mli_version == MLD_VERSION_2) {
  536                         in6m_clear_recorded(inm);
  537 
  538                         /*
  539                          * We need to release the final reference held
  540                          * for issuing the INCLUDE {}.
  541                          */
  542                         if (inm->in6m_state == MLD_LEAVING_MEMBER) {
  543                                 inm->in6m_state = MLD_NOT_MEMBER;
  544                                 in6m_rele_locked(inmh, inm);
  545                         }
  546                 }
  547         }
  548         NET_EPOCH_EXIT(et);
  549         IF_ADDR_WUNLOCK(ifp);
  550         MLD_UNLOCK();
  551 }
  552 
  553 /*
  554  * Hook for domifdetach.
  555  * Runs after link-layer cleanup; free MLD state.
  556  *
  557  * SMPng: Normally called with IF_AFDATA_LOCK held.
  558  */
  559 void
  560 mld_domifdetach(struct ifnet *ifp)
  561 {
  562 
  563         CTR3(KTR_MLD, "%s: called for ifp %p(%s)",
  564             __func__, ifp, if_name(ifp));
  565 
  566         MLD_LOCK();
  567         mli_delete_locked(ifp);
  568         MLD_UNLOCK();
  569 }
  570 
  571 static void
  572 mli_delete_locked(struct ifnet *ifp)
  573 {
  574         struct mld_ifsoftc *mli, *tmli;
  575 
  576         CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)",
  577             __func__, ifp, if_name(ifp));
  578 
  579         MLD_LOCK_ASSERT();
  580 
  581         LIST_FOREACH_SAFE(mli, &V_mli_head, mli_link, tmli) {
  582                 if (mli->mli_ifp == ifp) {
  583                         /*
  584                          * Free deferred General Query responses.
  585                          */
  586                         mbufq_drain(&mli->mli_gq);
  587 
  588                         LIST_REMOVE(mli, mli_link);
  589 
  590                         free(mli, M_MLD);
  591                         return;
  592                 }
  593         }
  594 }
  595 
  596 /*
  597  * Process a received MLDv1 general or address-specific query.
  598  * Assumes that the query header has been pulled up to sizeof(mld_hdr).
  599  *
  600  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
  601  * mld_addr. This is OK as we own the mbuf chain.
  602  */
  603 static int
  604 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
  605     /*const*/ struct mld_hdr *mld)
  606 {
  607         struct ifmultiaddr      *ifma;
  608         struct mld_ifsoftc      *mli;
  609         struct in6_multi        *inm;
  610         int                      is_general_query;
  611         uint16_t                 timer;
  612 #ifdef KTR
  613         char                     ip6tbuf[INET6_ADDRSTRLEN];
  614 #endif
  615 
  616         NET_EPOCH_ASSERT();
  617 
  618         is_general_query = 0;
  619 
  620         if (!mld_v1enable) {
  621                 CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)",
  622                     ip6_sprintf(ip6tbuf, &mld->mld_addr),
  623                     ifp, if_name(ifp));
  624                 return (0);
  625         }
  626 
  627         /*
  628          * RFC3810 Section 6.2: MLD queries must originate from
  629          * a router's link-local address.
  630          */
  631         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
  632                 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
  633                     ip6_sprintf(ip6tbuf, &ip6->ip6_src),
  634                     ifp, if_name(ifp));
  635                 return (0);
  636         }
  637 
  638         /*
  639          * Do address field validation upfront before we accept
  640          * the query.
  641          */
  642         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
  643                 /*
  644                  * MLDv1 General Query.
  645                  * If this was not sent to the all-nodes group, ignore it.
  646                  */
  647                 struct in6_addr          dst;
  648 
  649                 dst = ip6->ip6_dst;
  650                 in6_clearscope(&dst);
  651                 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes))
  652                         return (EINVAL);
  653                 is_general_query = 1;
  654         } else {
  655                 /*
  656                  * Embed scope ID of receiving interface in MLD query for
  657                  * lookup whilst we don't hold other locks.
  658                  */
  659                 in6_setscope(&mld->mld_addr, ifp, NULL);
  660         }
  661 
  662         IN6_MULTI_LIST_LOCK();
  663         MLD_LOCK();
  664 
  665         /*
  666          * Switch to MLDv1 host compatibility mode.
  667          */
  668         mli = MLD_IFINFO(ifp);
  669         KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
  670         mld_set_version(mli, MLD_VERSION_1);
  671 
  672         timer = (ntohs(mld->mld_maxdelay) * MLD_FASTHZ) / MLD_TIMER_SCALE;
  673         if (timer == 0)
  674                 timer = 1;
  675 
  676         if (is_general_query) {
  677                 /*
  678                  * For each reporting group joined on this
  679                  * interface, kick the report timer.
  680                  */
  681                 CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)",
  682                          ifp, if_name(ifp));
  683                 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
  684                         inm = in6m_ifmultiaddr_get_inm(ifma);
  685                         if (inm == NULL)
  686                                 continue;
  687                         mld_v1_update_group(inm, timer);
  688                 }
  689         } else {
  690                 /*
  691                  * MLDv1 Group-Specific Query.
  692                  * If this is a group-specific MLDv1 query, we need only
  693                  * look up the single group to process it.
  694                  */
  695                 inm = in6m_lookup_locked(ifp, &mld->mld_addr);
  696                 if (inm != NULL) {
  697                         CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)",
  698                             ip6_sprintf(ip6tbuf, &mld->mld_addr),
  699                             ifp, if_name(ifp));
  700                         mld_v1_update_group(inm, timer);
  701                 }
  702                 /* XXX Clear embedded scope ID as userland won't expect it. */
  703                 in6_clearscope(&mld->mld_addr);
  704         }
  705 
  706         MLD_UNLOCK();
  707         IN6_MULTI_LIST_UNLOCK();
  708 
  709         return (0);
  710 }
  711 
  712 /*
  713  * Update the report timer on a group in response to an MLDv1 query.
  714  *
  715  * If we are becoming the reporting member for this group, start the timer.
  716  * If we already are the reporting member for this group, and timer is
  717  * below the threshold, reset it.
  718  *
  719  * We may be updating the group for the first time since we switched
  720  * to MLDv2. If we are, then we must clear any recorded source lists,
  721  * and transition to REPORTING state; the group timer is overloaded
  722  * for group and group-source query responses. 
  723  *
  724  * Unlike MLDv2, the delay per group should be jittered
  725  * to avoid bursts of MLDv1 reports.
  726  */
  727 static void
  728 mld_v1_update_group(struct in6_multi *inm, const int timer)
  729 {
  730 #ifdef KTR
  731         char                     ip6tbuf[INET6_ADDRSTRLEN];
  732 #endif
  733 
  734         CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__,
  735             ip6_sprintf(ip6tbuf, &inm->in6m_addr),
  736             if_name(inm->in6m_ifp), timer);
  737 
  738         IN6_MULTI_LIST_LOCK_ASSERT();
  739 
  740         switch (inm->in6m_state) {
  741         case MLD_NOT_MEMBER:
  742         case MLD_SILENT_MEMBER:
  743                 break;
  744         case MLD_REPORTING_MEMBER:
  745                 if (inm->in6m_timer != 0 &&
  746                     inm->in6m_timer <= timer) {
  747                         CTR1(KTR_MLD, "%s: REPORTING and timer running, "
  748                             "skipping.", __func__);
  749                         break;
  750                 }
  751                 /* FALLTHROUGH */
  752         case MLD_SG_QUERY_PENDING_MEMBER:
  753         case MLD_G_QUERY_PENDING_MEMBER:
  754         case MLD_IDLE_MEMBER:
  755         case MLD_LAZY_MEMBER:
  756         case MLD_AWAKENING_MEMBER:
  757                 CTR1(KTR_MLD, "%s: ->REPORTING", __func__);
  758                 inm->in6m_state = MLD_REPORTING_MEMBER;
  759                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
  760                 V_current_state_timers_running6 = 1;
  761                 break;
  762         case MLD_SLEEPING_MEMBER:
  763                 CTR1(KTR_MLD, "%s: ->AWAKENING", __func__);
  764                 inm->in6m_state = MLD_AWAKENING_MEMBER;
  765                 break;
  766         case MLD_LEAVING_MEMBER:
  767                 break;
  768         }
  769 }
  770 
  771 /*
  772  * Process a received MLDv2 general, group-specific or
  773  * group-and-source-specific query.
  774  *
  775  * Assumes that mld points to a struct mldv2_query which is stored in
  776  * contiguous memory.
  777  *
  778  * Return 0 if successful, otherwise an appropriate error code is returned.
  779  */
  780 static int
  781 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
  782     struct mbuf *m, struct mldv2_query *mld, const int off, const int icmp6len)
  783 {
  784         struct mld_ifsoftc      *mli;
  785         struct in6_multi        *inm;
  786         uint32_t                 maxdelay, nsrc, qqi;
  787         int                      is_general_query;
  788         uint16_t                 timer;
  789         uint8_t                  qrv;
  790 #ifdef KTR
  791         char                     ip6tbuf[INET6_ADDRSTRLEN];
  792 #endif
  793 
  794         NET_EPOCH_ASSERT();
  795 
  796         if (!mld_v2enable) {
  797                 CTR3(KTR_MLD, "ignore v2 query src %s on ifp %p(%s)",
  798                     ip6_sprintf(ip6tbuf, &ip6->ip6_src),
  799                     ifp, if_name(ifp));
  800                 return (0);
  801         }
  802 
  803         /*
  804          * RFC3810 Section 6.2: MLD queries must originate from
  805          * a router's link-local address.
  806          */
  807         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
  808                 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
  809                     ip6_sprintf(ip6tbuf, &ip6->ip6_src),
  810                     ifp, if_name(ifp));
  811                 return (0);
  812         }
  813 
  814         is_general_query = 0;
  815 
  816         CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp));
  817 
  818         maxdelay = ntohs(mld->mld_maxdelay);    /* in 1/10ths of a second */
  819         if (maxdelay >= 32768) {
  820                 maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
  821                            (MLD_MRC_EXP(maxdelay) + 3);
  822         }
  823         timer = (maxdelay * MLD_FASTHZ) / MLD_TIMER_SCALE;
  824         if (timer == 0)
  825                 timer = 1;
  826 
  827         qrv = MLD_QRV(mld->mld_misc);
  828         if (qrv < 2) {
  829                 CTR3(KTR_MLD, "%s: clamping qrv %d to %d", __func__,
  830                     qrv, MLD_RV_INIT);
  831                 qrv = MLD_RV_INIT;
  832         }
  833 
  834         qqi = mld->mld_qqi;
  835         if (qqi >= 128) {
  836                 qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
  837                      (MLD_QQIC_EXP(mld->mld_qqi) + 3);
  838         }
  839 
  840         nsrc = ntohs(mld->mld_numsrc);
  841         if (nsrc > MLD_MAX_GS_SOURCES)
  842                 return (EMSGSIZE);
  843         if (icmp6len < sizeof(struct mldv2_query) +
  844             (nsrc * sizeof(struct in6_addr)))
  845                 return (EMSGSIZE);
  846 
  847         /*
  848          * Do further input validation upfront to avoid resetting timers
  849          * should we need to discard this query.
  850          */
  851         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
  852                 /*
  853                  * A general query with a source list has undefined
  854                  * behaviour; discard it.
  855                  */
  856                 if (nsrc > 0)
  857                         return (EINVAL);
  858                 is_general_query = 1;
  859         } else {
  860                 /*
  861                  * Embed scope ID of receiving interface in MLD query for
  862                  * lookup whilst we don't hold other locks (due to KAME
  863                  * locking lameness). We own this mbuf chain just now.
  864                  */
  865                 in6_setscope(&mld->mld_addr, ifp, NULL);
  866         }
  867 
  868         IN6_MULTI_LIST_LOCK();
  869         MLD_LOCK();
  870 
  871         mli = MLD_IFINFO(ifp);
  872         KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
  873 
  874         /*
  875          * Discard the v2 query if we're in Compatibility Mode.
  876          * The RFC is pretty clear that hosts need to stay in MLDv1 mode
  877          * until the Old Version Querier Present timer expires.
  878          */
  879         if (mli->mli_version != MLD_VERSION_2)
  880                 goto out_locked;
  881 
  882         mld_set_version(mli, MLD_VERSION_2);
  883         mli->mli_rv = qrv;
  884         mli->mli_qi = qqi;
  885         mli->mli_qri = maxdelay;
  886 
  887         CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi,
  888             maxdelay);
  889 
  890         if (is_general_query) {
  891                 /*
  892                  * MLDv2 General Query.
  893                  *
  894                  * Schedule a current-state report on this ifp for
  895                  * all groups, possibly containing source lists.
  896                  *
  897                  * If there is a pending General Query response
  898                  * scheduled earlier than the selected delay, do
  899                  * not schedule any other reports.
  900                  * Otherwise, reset the interface timer.
  901                  */
  902                 CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)",
  903                     ifp, if_name(ifp));
  904                 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
  905                         mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
  906                         V_interface_timers_running6 = 1;
  907                 }
  908         } else {
  909                 /*
  910                  * MLDv2 Group-specific or Group-and-source-specific Query.
  911                  *
  912                  * Group-source-specific queries are throttled on
  913                  * a per-group basis to defeat denial-of-service attempts.
  914                  * Queries for groups we are not a member of on this
  915                  * link are simply ignored.
  916                  */
  917                 inm = in6m_lookup_locked(ifp, &mld->mld_addr);
  918                 if (inm == NULL)
  919                         goto out_locked;
  920                 if (nsrc > 0) {
  921                         if (!ratecheck(&inm->in6m_lastgsrtv,
  922                             &V_mld_gsrdelay)) {
  923                                 CTR1(KTR_MLD, "%s: GS query throttled.",
  924                                     __func__);
  925                                 goto out_locked;
  926                         }
  927                 }
  928                 CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)",
  929                      ifp, if_name(ifp));
  930                 /*
  931                  * If there is a pending General Query response
  932                  * scheduled sooner than the selected delay, no
  933                  * further report need be scheduled.
  934                  * Otherwise, prepare to respond to the
  935                  * group-specific or group-and-source query.
  936                  */
  937                 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer)
  938                         mld_v2_process_group_query(inm, mli, timer, m, mld, off);
  939 
  940                 /* XXX Clear embedded scope ID as userland won't expect it. */
  941                 in6_clearscope(&mld->mld_addr);
  942         }
  943 
  944 out_locked:
  945         MLD_UNLOCK();
  946         IN6_MULTI_LIST_UNLOCK();
  947 
  948         return (0);
  949 }
  950 
  951 /*
  952  * Process a received MLDv2 group-specific or group-and-source-specific
  953  * query.
  954  * Return <0 if any error occurred. Currently this is ignored.
  955  */
  956 static int
  957 mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli,
  958     int timer, struct mbuf *m0, struct mldv2_query *mld, const int off)
  959 {
  960         int                      retval;
  961         uint16_t                 nsrc;
  962 
  963         IN6_MULTI_LIST_LOCK_ASSERT();
  964         MLD_LOCK_ASSERT();
  965 
  966         retval = 0;
  967 
  968         switch (inm->in6m_state) {
  969         case MLD_NOT_MEMBER:
  970         case MLD_SILENT_MEMBER:
  971         case MLD_SLEEPING_MEMBER:
  972         case MLD_LAZY_MEMBER:
  973         case MLD_AWAKENING_MEMBER:
  974         case MLD_IDLE_MEMBER:
  975         case MLD_LEAVING_MEMBER:
  976                 return (retval);
  977                 break;
  978         case MLD_REPORTING_MEMBER:
  979         case MLD_G_QUERY_PENDING_MEMBER:
  980         case MLD_SG_QUERY_PENDING_MEMBER:
  981                 break;
  982         }
  983 
  984         nsrc = ntohs(mld->mld_numsrc);
  985 
  986         /* Length should be checked by calling function. */
  987         KASSERT((m0->m_flags & M_PKTHDR) == 0 ||
  988             m0->m_pkthdr.len >= off + sizeof(struct mldv2_query) +
  989             nsrc * sizeof(struct in6_addr),
  990             ("mldv2 packet is too short: (%d bytes < %zd bytes, m=%p)",
  991             m0->m_pkthdr.len, off + sizeof(struct mldv2_query) +
  992             nsrc * sizeof(struct in6_addr), m0));
  993 
  994         /*
  995          * Deal with group-specific queries upfront.
  996          * If any group query is already pending, purge any recorded
  997          * source-list state if it exists, and schedule a query response
  998          * for this group-specific query.
  999          */
 1000         if (nsrc == 0) {
 1001                 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
 1002                     inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
 1003                         in6m_clear_recorded(inm);
 1004                         timer = min(inm->in6m_timer, timer);
 1005                 }
 1006                 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
 1007                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
 1008                 V_current_state_timers_running6 = 1;
 1009                 return (retval);
 1010         }
 1011 
 1012         /*
 1013          * Deal with the case where a group-and-source-specific query has
 1014          * been received but a group-specific query is already pending.
 1015          */
 1016         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
 1017                 timer = min(inm->in6m_timer, timer);
 1018                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
 1019                 V_current_state_timers_running6 = 1;
 1020                 return (retval);
 1021         }
 1022 
 1023         /*
 1024          * Finally, deal with the case where a group-and-source-specific
 1025          * query has been received, where a response to a previous g-s-r
 1026          * query exists, or none exists.
 1027          * In this case, we need to parse the source-list which the Querier
 1028          * has provided us with and check if we have any source list filter
 1029          * entries at T1 for these sources. If we do not, there is no need
 1030          * schedule a report and the query may be dropped.
 1031          * If we do, we must record them and schedule a current-state
 1032          * report for those sources.
 1033          */
 1034         if (inm->in6m_nsrc > 0) {
 1035                 struct in6_addr          srcaddr;
 1036                 int                      i, nrecorded;
 1037                 int                      soff;
 1038 
 1039                 soff = off + sizeof(struct mldv2_query);
 1040                 nrecorded = 0;
 1041                 for (i = 0; i < nsrc; i++) {
 1042                         m_copydata(m0, soff, sizeof(struct in6_addr),
 1043                             (caddr_t)&srcaddr);
 1044                         retval = in6m_record_source(inm, &srcaddr);
 1045                         if (retval < 0)
 1046                                 break;
 1047                         nrecorded += retval;
 1048                         soff += sizeof(struct in6_addr);
 1049                 }
 1050                 if (nrecorded > 0) {
 1051                         CTR1(KTR_MLD,
 1052                             "%s: schedule response to SG query", __func__);
 1053                         inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
 1054                         inm->in6m_timer = MLD_RANDOM_DELAY(timer);
 1055                         V_current_state_timers_running6 = 1;
 1056                 }
 1057         }
 1058 
 1059         return (retval);
 1060 }
 1061 
 1062 /*
 1063  * Process a received MLDv1 host membership report.
 1064  * Assumes mld points to mld_hdr in pulled up mbuf chain.
 1065  *
 1066  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
 1067  * mld_addr. This is OK as we own the mbuf chain.
 1068  */
 1069 static int
 1070 mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
 1071     /*const*/ struct mld_hdr *mld)
 1072 {
 1073         struct in6_addr          src, dst;
 1074         struct in6_ifaddr       *ia;
 1075         struct in6_multi        *inm;
 1076 #ifdef KTR
 1077         char                     ip6tbuf[INET6_ADDRSTRLEN];
 1078 #endif
 1079 
 1080         NET_EPOCH_ASSERT();
 1081 
 1082         if (!mld_v1enable) {
 1083                 CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)",
 1084                     ip6_sprintf(ip6tbuf, &mld->mld_addr),
 1085                     ifp, if_name(ifp));
 1086                 return (0);
 1087         }
 1088 
 1089         if (ifp->if_flags & IFF_LOOPBACK)
 1090                 return (0);
 1091 
 1092         /*
 1093          * MLDv1 reports must originate from a host's link-local address,
 1094          * or the unspecified address (when booting).
 1095          */
 1096         src = ip6->ip6_src;
 1097         in6_clearscope(&src);
 1098         if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
 1099                 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
 1100                     ip6_sprintf(ip6tbuf, &ip6->ip6_src),
 1101                     ifp, if_name(ifp));
 1102                 return (EINVAL);
 1103         }
 1104 
 1105         /*
 1106          * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
 1107          * group, and must be directed to the group itself.
 1108          */
 1109         dst = ip6->ip6_dst;
 1110         in6_clearscope(&dst);
 1111         if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
 1112             !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
 1113                 CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)",
 1114                     ip6_sprintf(ip6tbuf, &ip6->ip6_dst),
 1115                     ifp, if_name(ifp));
 1116                 return (EINVAL);
 1117         }
 1118 
 1119         /*
 1120          * Make sure we don't hear our own membership report, as fast
 1121          * leave requires knowing that we are the only member of a
 1122          * group. Assume we used the link-local address if available,
 1123          * otherwise look for ::.
 1124          *
 1125          * XXX Note that scope ID comparison is needed for the address
 1126          * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
 1127          * performed for the on-wire address.
 1128          */
 1129         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
 1130         if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) ||
 1131             (ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) {
 1132                 if (ia != NULL)
 1133                         ifa_free(&ia->ia_ifa);
 1134                 return (0);
 1135         }
 1136         if (ia != NULL)
 1137                 ifa_free(&ia->ia_ifa);
 1138 
 1139         CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)",
 1140             ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp));
 1141 
 1142         /*
 1143          * Embed scope ID of receiving interface in MLD query for lookup
 1144          * whilst we don't hold other locks (due to KAME locking lameness).
 1145          */
 1146         if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
 1147                 in6_setscope(&mld->mld_addr, ifp, NULL);
 1148 
 1149         IN6_MULTI_LIST_LOCK();
 1150         MLD_LOCK();
 1151 
 1152         /*
 1153          * MLDv1 report suppression.
 1154          * If we are a member of this group, and our membership should be
 1155          * reported, and our group timer is pending or about to be reset,
 1156          * stop our group timer by transitioning to the 'lazy' state.
 1157          */
 1158         inm = in6m_lookup_locked(ifp, &mld->mld_addr);
 1159         if (inm != NULL) {
 1160                 struct mld_ifsoftc *mli;
 1161 
 1162                 mli = inm->in6m_mli;
 1163                 KASSERT(mli != NULL,
 1164                     ("%s: no mli for ifp %p", __func__, ifp));
 1165 
 1166                 /*
 1167                  * If we are in MLDv2 host mode, do not allow the
 1168                  * other host's MLDv1 report to suppress our reports.
 1169                  */
 1170                 if (mli->mli_version == MLD_VERSION_2)
 1171                         goto out_locked;
 1172 
 1173                 inm->in6m_timer = 0;
 1174 
 1175                 switch (inm->in6m_state) {
 1176                 case MLD_NOT_MEMBER:
 1177                 case MLD_SILENT_MEMBER:
 1178                 case MLD_SLEEPING_MEMBER:
 1179                         break;
 1180                 case MLD_REPORTING_MEMBER:
 1181                 case MLD_IDLE_MEMBER:
 1182                 case MLD_AWAKENING_MEMBER:
 1183                         CTR3(KTR_MLD,
 1184                             "report suppressed for %s on ifp %p(%s)",
 1185                             ip6_sprintf(ip6tbuf, &mld->mld_addr),
 1186                             ifp, if_name(ifp));
 1187                 case MLD_LAZY_MEMBER:
 1188                         inm->in6m_state = MLD_LAZY_MEMBER;
 1189                         break;
 1190                 case MLD_G_QUERY_PENDING_MEMBER:
 1191                 case MLD_SG_QUERY_PENDING_MEMBER:
 1192                 case MLD_LEAVING_MEMBER:
 1193                         break;
 1194                 }
 1195         }
 1196 
 1197 out_locked:
 1198         MLD_UNLOCK();
 1199         IN6_MULTI_LIST_UNLOCK();
 1200 
 1201         /* XXX Clear embedded scope ID as userland won't expect it. */
 1202         in6_clearscope(&mld->mld_addr);
 1203 
 1204         return (0);
 1205 }
 1206 
 1207 /*
 1208  * MLD input path.
 1209  *
 1210  * Assume query messages which fit in a single ICMPv6 message header
 1211  * have been pulled up.
 1212  * Assume that userland will want to see the message, even if it
 1213  * otherwise fails kernel input validation; do not free it.
 1214  * Pullup may however free the mbuf chain m if it fails.
 1215  *
 1216  * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
 1217  */
 1218 int
 1219 mld_input(struct mbuf **mp, int off, int icmp6len)
 1220 {
 1221         struct ifnet    *ifp;
 1222         struct ip6_hdr  *ip6;
 1223         struct mbuf     *m;
 1224         struct mld_hdr  *mld;
 1225         int              mldlen;
 1226 
 1227         m = *mp;
 1228         CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off);
 1229 
 1230         ifp = m->m_pkthdr.rcvif;
 1231 
 1232         /* Pullup to appropriate size. */
 1233         if (m->m_len < off + sizeof(*mld)) {
 1234                 m = m_pullup(m, off + sizeof(*mld));
 1235                 if (m == NULL) {
 1236                         ICMP6STAT_INC(icp6s_badlen);
 1237                         return (IPPROTO_DONE);
 1238                 }
 1239         }
 1240         mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
 1241         if (mld->mld_type == MLD_LISTENER_QUERY &&
 1242             icmp6len >= sizeof(struct mldv2_query)) {
 1243                 mldlen = sizeof(struct mldv2_query);
 1244         } else {
 1245                 mldlen = sizeof(struct mld_hdr);
 1246         }
 1247         if (m->m_len < off + mldlen) {
 1248                 m = m_pullup(m, off + mldlen);
 1249                 if (m == NULL) {
 1250                         ICMP6STAT_INC(icp6s_badlen);
 1251                         return (IPPROTO_DONE);
 1252                 }
 1253         }
 1254         *mp = m;
 1255         ip6 = mtod(m, struct ip6_hdr *);
 1256         mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
 1257 
 1258         /*
 1259          * Userland needs to see all of this traffic for implementing
 1260          * the endpoint discovery portion of multicast routing.
 1261          */
 1262         switch (mld->mld_type) {
 1263         case MLD_LISTENER_QUERY:
 1264                 icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
 1265                 if (icmp6len == sizeof(struct mld_hdr)) {
 1266                         if (mld_v1_input_query(ifp, ip6, mld) != 0)
 1267                                 return (0);
 1268                 } else if (icmp6len >= sizeof(struct mldv2_query)) {
 1269                         if (mld_v2_input_query(ifp, ip6, m,
 1270                             (struct mldv2_query *)mld, off, icmp6len) != 0)
 1271                                 return (0);
 1272                 }
 1273                 break;
 1274         case MLD_LISTENER_REPORT:
 1275                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
 1276                 if (mld_v1_input_report(ifp, ip6, mld) != 0)
 1277                         return (0);
 1278                 break;
 1279         case MLDV2_LISTENER_REPORT:
 1280                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
 1281                 break;
 1282         case MLD_LISTENER_DONE:
 1283                 icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
 1284                 break;
 1285         default:
 1286                 break;
 1287         }
 1288 
 1289         return (0);
 1290 }
 1291 
 1292 /*
 1293  * Fast timeout handler (global).
 1294  * VIMAGE: Timeout handlers are expected to service all vimages.
 1295  */
 1296 static struct callout mldfast_callout;
 1297 static void
 1298 mld_fasttimo(void *arg __unused)
 1299 {
 1300         struct epoch_tracker et;
 1301         struct in6_multi_head inmh;
 1302         VNET_ITERATOR_DECL(vnet_iter);
 1303 
 1304         SLIST_INIT(&inmh);
 1305 
 1306         NET_EPOCH_ENTER(et);
 1307         VNET_LIST_RLOCK_NOSLEEP();
 1308         VNET_FOREACH(vnet_iter) {
 1309                 CURVNET_SET(vnet_iter);
 1310                 mld_fasttimo_vnet(&inmh);
 1311                 CURVNET_RESTORE();
 1312         }
 1313         VNET_LIST_RUNLOCK_NOSLEEP();
 1314         NET_EPOCH_EXIT(et);
 1315         in6m_release_list_deferred(&inmh);
 1316 
 1317         callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL);
 1318 }
 1319 
 1320 /*
 1321  * Fast timeout handler (per-vnet).
 1322  *
 1323  * VIMAGE: Assume caller has set up our curvnet.
 1324  */
 1325 static void
 1326 mld_fasttimo_vnet(struct in6_multi_head *inmh)
 1327 {
 1328         struct mbufq             scq;   /* State-change packets */
 1329         struct mbufq             qrq;   /* Query response packets */
 1330         struct ifnet            *ifp;
 1331         struct mld_ifsoftc      *mli;
 1332         struct ifmultiaddr      *ifma;
 1333         struct in6_multi        *inm;
 1334         int                      uri_fasthz;
 1335 
 1336         uri_fasthz = 0;
 1337 
 1338         /*
 1339          * Quick check to see if any work needs to be done, in order to
 1340          * minimize the overhead of fasttimo processing.
 1341          * SMPng: XXX Unlocked reads.
 1342          */
 1343         if (!V_current_state_timers_running6 &&
 1344             !V_interface_timers_running6 &&
 1345             !V_state_change_timers_running6)
 1346                 return;
 1347 
 1348         IN6_MULTI_LIST_LOCK();
 1349         MLD_LOCK();
 1350 
 1351         /*
 1352          * MLDv2 General Query response timer processing.
 1353          */
 1354         if (V_interface_timers_running6) {
 1355                 CTR1(KTR_MLD, "%s: interface timers running", __func__);
 1356 
 1357                 V_interface_timers_running6 = 0;
 1358                 LIST_FOREACH(mli, &V_mli_head, mli_link) {
 1359                         if (mli->mli_v2_timer == 0) {
 1360                                 /* Do nothing. */
 1361                         } else if (--mli->mli_v2_timer == 0) {
 1362                                 mld_v2_dispatch_general_query(mli);
 1363                         } else {
 1364                                 V_interface_timers_running6 = 1;
 1365                         }
 1366                 }
 1367         }
 1368 
 1369         if (!V_current_state_timers_running6 &&
 1370             !V_state_change_timers_running6)
 1371                 goto out_locked;
 1372 
 1373         V_current_state_timers_running6 = 0;
 1374         V_state_change_timers_running6 = 0;
 1375 
 1376         CTR1(KTR_MLD, "%s: state change timers running", __func__);
 1377 
 1378         /*
 1379          * MLD host report and state-change timer processing.
 1380          * Note: Processing a v2 group timer may remove a node.
 1381          */
 1382         LIST_FOREACH(mli, &V_mli_head, mli_link) {
 1383                 ifp = mli->mli_ifp;
 1384 
 1385                 if (mli->mli_version == MLD_VERSION_2) {
 1386                         uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri *
 1387                             MLD_FASTHZ);
 1388                         mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS);
 1389                         mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS);
 1390                 }
 1391 
 1392                 IF_ADDR_WLOCK(ifp);
 1393                 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 1394                         inm = in6m_ifmultiaddr_get_inm(ifma);
 1395                         if (inm == NULL)
 1396                                 continue;
 1397                         switch (mli->mli_version) {
 1398                         case MLD_VERSION_1:
 1399                                 mld_v1_process_group_timer(inmh, inm);
 1400                                 break;
 1401                         case MLD_VERSION_2:
 1402                                 mld_v2_process_group_timers(inmh, &qrq,
 1403                                     &scq, inm, uri_fasthz);
 1404                                 break;
 1405                         }
 1406                 }
 1407                 IF_ADDR_WUNLOCK(ifp);
 1408 
 1409                 switch (mli->mli_version) {
 1410                 case MLD_VERSION_1:
 1411                         /*
 1412                          * Transmit reports for this lifecycle.  This
 1413                          * is done while not holding IF_ADDR_LOCK
 1414                          * since this can call
 1415                          * in6ifa_ifpforlinklocal() which locks
 1416                          * IF_ADDR_LOCK internally as well as
 1417                          * ip6_output() to transmit a packet.
 1418                          */
 1419                         while ((inm = SLIST_FIRST(inmh)) != NULL) {
 1420                                 SLIST_REMOVE_HEAD(inmh, in6m_defer);
 1421                                 (void)mld_v1_transmit_report(inm,
 1422                                     MLD_LISTENER_REPORT);
 1423                         }
 1424                         break;
 1425                 case MLD_VERSION_2:
 1426                         mld_dispatch_queue(&qrq, 0);
 1427                         mld_dispatch_queue(&scq, 0);
 1428                         break;
 1429                 }
 1430         }
 1431 
 1432 out_locked:
 1433         MLD_UNLOCK();
 1434         IN6_MULTI_LIST_UNLOCK();
 1435 }
 1436 
 1437 /*
 1438  * Update host report group timer.
 1439  * Will update the global pending timer flags.
 1440  */
 1441 static void
 1442 mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm)
 1443 {
 1444         int report_timer_expired;
 1445 
 1446         IN6_MULTI_LIST_LOCK_ASSERT();
 1447         MLD_LOCK_ASSERT();
 1448 
 1449         if (inm->in6m_timer == 0) {
 1450                 report_timer_expired = 0;
 1451         } else if (--inm->in6m_timer == 0) {
 1452                 report_timer_expired = 1;
 1453         } else {
 1454                 V_current_state_timers_running6 = 1;
 1455                 return;
 1456         }
 1457 
 1458         switch (inm->in6m_state) {
 1459         case MLD_NOT_MEMBER:
 1460         case MLD_SILENT_MEMBER:
 1461         case MLD_IDLE_MEMBER:
 1462         case MLD_LAZY_MEMBER:
 1463         case MLD_SLEEPING_MEMBER:
 1464         case MLD_AWAKENING_MEMBER:
 1465                 break;
 1466         case MLD_REPORTING_MEMBER:
 1467                 if (report_timer_expired) {
 1468                         inm->in6m_state = MLD_IDLE_MEMBER;
 1469                         SLIST_INSERT_HEAD(inmh, inm, in6m_defer);
 1470                 }
 1471                 break;
 1472         case MLD_G_QUERY_PENDING_MEMBER:
 1473         case MLD_SG_QUERY_PENDING_MEMBER:
 1474         case MLD_LEAVING_MEMBER:
 1475                 break;
 1476         }
 1477 }
 1478 
 1479 /*
 1480  * Update a group's timers for MLDv2.
 1481  * Will update the global pending timer flags.
 1482  * Note: Unlocked read from mli.
 1483  */
 1484 static void
 1485 mld_v2_process_group_timers(struct in6_multi_head *inmh,
 1486     struct mbufq *qrq, struct mbufq *scq,
 1487     struct in6_multi *inm, const int uri_fasthz)
 1488 {
 1489         int query_response_timer_expired;
 1490         int state_change_retransmit_timer_expired;
 1491 #ifdef KTR
 1492         char ip6tbuf[INET6_ADDRSTRLEN];
 1493 #endif
 1494 
 1495         IN6_MULTI_LIST_LOCK_ASSERT();
 1496         MLD_LOCK_ASSERT();
 1497 
 1498         query_response_timer_expired = 0;
 1499         state_change_retransmit_timer_expired = 0;
 1500 
 1501         /*
 1502          * During a transition from compatibility mode back to MLDv2,
 1503          * a group record in REPORTING state may still have its group
 1504          * timer active. This is a no-op in this function; it is easier
 1505          * to deal with it here than to complicate the slow-timeout path.
 1506          */
 1507         if (inm->in6m_timer == 0) {
 1508                 query_response_timer_expired = 0;
 1509         } else if (--inm->in6m_timer == 0) {
 1510                 query_response_timer_expired = 1;
 1511         } else {
 1512                 V_current_state_timers_running6 = 1;
 1513         }
 1514 
 1515         if (inm->in6m_sctimer == 0) {
 1516                 state_change_retransmit_timer_expired = 0;
 1517         } else if (--inm->in6m_sctimer == 0) {
 1518                 state_change_retransmit_timer_expired = 1;
 1519         } else {
 1520                 V_state_change_timers_running6 = 1;
 1521         }
 1522 
 1523         /* We are in fasttimo, so be quick about it. */
 1524         if (!state_change_retransmit_timer_expired &&
 1525             !query_response_timer_expired)
 1526                 return;
 1527 
 1528         switch (inm->in6m_state) {
 1529         case MLD_NOT_MEMBER:
 1530         case MLD_SILENT_MEMBER:
 1531         case MLD_SLEEPING_MEMBER:
 1532         case MLD_LAZY_MEMBER:
 1533         case MLD_AWAKENING_MEMBER:
 1534         case MLD_IDLE_MEMBER:
 1535                 break;
 1536         case MLD_G_QUERY_PENDING_MEMBER:
 1537         case MLD_SG_QUERY_PENDING_MEMBER:
 1538                 /*
 1539                  * Respond to a previously pending Group-Specific
 1540                  * or Group-and-Source-Specific query by enqueueing
 1541                  * the appropriate Current-State report for
 1542                  * immediate transmission.
 1543                  */
 1544                 if (query_response_timer_expired) {
 1545                         int retval __unused;
 1546 
 1547                         retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
 1548                             (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
 1549                             0);
 1550                         CTR2(KTR_MLD, "%s: enqueue record = %d",
 1551                             __func__, retval);
 1552                         inm->in6m_state = MLD_REPORTING_MEMBER;
 1553                         in6m_clear_recorded(inm);
 1554                 }
 1555                 /* FALLTHROUGH */
 1556         case MLD_REPORTING_MEMBER:
 1557         case MLD_LEAVING_MEMBER:
 1558                 if (state_change_retransmit_timer_expired) {
 1559                         /*
 1560                          * State-change retransmission timer fired.
 1561                          * If there are any further pending retransmissions,
 1562                          * set the global pending state-change flag, and
 1563                          * reset the timer.
 1564                          */
 1565                         if (--inm->in6m_scrv > 0) {
 1566                                 inm->in6m_sctimer = uri_fasthz;
 1567                                 V_state_change_timers_running6 = 1;
 1568                         }
 1569                         /*
 1570                          * Retransmit the previously computed state-change
 1571                          * report. If there are no further pending
 1572                          * retransmissions, the mbuf queue will be consumed.
 1573                          * Update T0 state to T1 as we have now sent
 1574                          * a state-change.
 1575                          */
 1576                         (void)mld_v2_merge_state_changes(inm, scq);
 1577 
 1578                         in6m_commit(inm);
 1579                         CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
 1580                             ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 1581                             if_name(inm->in6m_ifp));
 1582 
 1583                         /*
 1584                          * If we are leaving the group for good, make sure
 1585                          * we release MLD's reference to it.
 1586                          * This release must be deferred using a SLIST,
 1587                          * as we are called from a loop which traverses
 1588                          * the in_ifmultiaddr TAILQ.
 1589                          */
 1590                         if (inm->in6m_state == MLD_LEAVING_MEMBER &&
 1591                             inm->in6m_scrv == 0) {
 1592                                 inm->in6m_state = MLD_NOT_MEMBER;
 1593                                 in6m_disconnect_locked(inmh, inm);
 1594                                 in6m_rele_locked(inmh, inm);
 1595                         }
 1596                 }
 1597                 break;
 1598         }
 1599 }
 1600 
 1601 /*
 1602  * Switch to a different version on the given interface,
 1603  * as per Section 9.12.
 1604  */
 1605 static void
 1606 mld_set_version(struct mld_ifsoftc *mli, const int version)
 1607 {
 1608         int old_version_timer;
 1609 
 1610         MLD_LOCK_ASSERT();
 1611 
 1612         CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__,
 1613             version, mli->mli_ifp, if_name(mli->mli_ifp));
 1614 
 1615         if (version == MLD_VERSION_1) {
 1616                 /*
 1617                  * Compute the "Older Version Querier Present" timer as per
 1618                  * Section 9.12.
 1619                  */
 1620                 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
 1621                 old_version_timer *= MLD_SLOWHZ;
 1622                 mli->mli_v1_timer = old_version_timer;
 1623         }
 1624 
 1625         if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
 1626                 mli->mli_version = MLD_VERSION_1;
 1627                 mld_v2_cancel_link_timers(mli);
 1628         }
 1629 }
 1630 
 1631 /*
 1632  * Cancel pending MLDv2 timers for the given link and all groups
 1633  * joined on it; state-change, general-query, and group-query timers.
 1634  */
 1635 static void
 1636 mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
 1637 {
 1638         struct epoch_tracker     et;
 1639         struct in6_multi_head    inmh;
 1640         struct ifmultiaddr      *ifma;
 1641         struct ifnet            *ifp;
 1642         struct in6_multi        *inm;
 1643 
 1644         CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__,
 1645             mli->mli_ifp, if_name(mli->mli_ifp));
 1646 
 1647         SLIST_INIT(&inmh);
 1648         IN6_MULTI_LIST_LOCK_ASSERT();
 1649         MLD_LOCK_ASSERT();
 1650 
 1651         /*
 1652          * Fast-track this potentially expensive operation
 1653          * by checking all the global 'timer pending' flags.
 1654          */
 1655         if (!V_interface_timers_running6 &&
 1656             !V_state_change_timers_running6 &&
 1657             !V_current_state_timers_running6)
 1658                 return;
 1659 
 1660         mli->mli_v2_timer = 0;
 1661 
 1662         ifp = mli->mli_ifp;
 1663 
 1664         IF_ADDR_WLOCK(ifp);
 1665         NET_EPOCH_ENTER(et);
 1666         CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 1667                 inm = in6m_ifmultiaddr_get_inm(ifma);
 1668                 if (inm == NULL)
 1669                         continue;
 1670                 switch (inm->in6m_state) {
 1671                 case MLD_NOT_MEMBER:
 1672                 case MLD_SILENT_MEMBER:
 1673                 case MLD_IDLE_MEMBER:
 1674                 case MLD_LAZY_MEMBER:
 1675                 case MLD_SLEEPING_MEMBER:
 1676                 case MLD_AWAKENING_MEMBER:
 1677                         break;
 1678                 case MLD_LEAVING_MEMBER:
 1679                         /*
 1680                          * If we are leaving the group and switching
 1681                          * version, we need to release the final
 1682                          * reference held for issuing the INCLUDE {}.
 1683                          */
 1684                         if (inm->in6m_refcount == 1)
 1685                                 in6m_disconnect_locked(&inmh, inm);
 1686                         in6m_rele_locked(&inmh, inm);
 1687                         /* FALLTHROUGH */
 1688                 case MLD_G_QUERY_PENDING_MEMBER:
 1689                 case MLD_SG_QUERY_PENDING_MEMBER:
 1690                         in6m_clear_recorded(inm);
 1691                         /* FALLTHROUGH */
 1692                 case MLD_REPORTING_MEMBER:
 1693                         inm->in6m_sctimer = 0;
 1694                         inm->in6m_timer = 0;
 1695                         inm->in6m_state = MLD_REPORTING_MEMBER;
 1696                         /*
 1697                          * Free any pending MLDv2 state-change records.
 1698                          */
 1699                         mbufq_drain(&inm->in6m_scq);
 1700                         break;
 1701                 }
 1702         }
 1703         NET_EPOCH_EXIT(et);
 1704         IF_ADDR_WUNLOCK(ifp);
 1705         in6m_release_list_deferred(&inmh);
 1706 }
 1707 
 1708 /*
 1709  * Global slowtimo handler.
 1710  * VIMAGE: Timeout handlers are expected to service all vimages.
 1711  */
 1712 static struct callout mldslow_callout;
 1713 static void
 1714 mld_slowtimo(void *arg __unused)
 1715 {
 1716         VNET_ITERATOR_DECL(vnet_iter);
 1717 
 1718         VNET_LIST_RLOCK_NOSLEEP();
 1719         VNET_FOREACH(vnet_iter) {
 1720                 CURVNET_SET(vnet_iter);
 1721                 mld_slowtimo_vnet();
 1722                 CURVNET_RESTORE();
 1723         }
 1724         VNET_LIST_RUNLOCK_NOSLEEP();
 1725 
 1726         callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL);
 1727 }
 1728 
 1729 /*
 1730  * Per-vnet slowtimo handler.
 1731  */
 1732 static void
 1733 mld_slowtimo_vnet(void)
 1734 {
 1735         struct mld_ifsoftc *mli;
 1736 
 1737         MLD_LOCK();
 1738 
 1739         LIST_FOREACH(mli, &V_mli_head, mli_link) {
 1740                 mld_v1_process_querier_timers(mli);
 1741         }
 1742 
 1743         MLD_UNLOCK();
 1744 }
 1745 
 1746 /*
 1747  * Update the Older Version Querier Present timers for a link.
 1748  * See Section 9.12 of RFC 3810.
 1749  */
 1750 static void
 1751 mld_v1_process_querier_timers(struct mld_ifsoftc *mli)
 1752 {
 1753 
 1754         MLD_LOCK_ASSERT();
 1755 
 1756         if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) {
 1757                 /*
 1758                  * MLDv1 Querier Present timer expired; revert to MLDv2.
 1759                  */
 1760                 CTR5(KTR_MLD,
 1761                     "%s: transition from v%d -> v%d on %p(%s)",
 1762                     __func__, mli->mli_version, MLD_VERSION_2,
 1763                     mli->mli_ifp, if_name(mli->mli_ifp));
 1764                 mli->mli_version = MLD_VERSION_2;
 1765         }
 1766 }
 1767 
 1768 /*
 1769  * Transmit an MLDv1 report immediately.
 1770  */
 1771 static int
 1772 mld_v1_transmit_report(struct in6_multi *in6m, const int type)
 1773 {
 1774         struct ifnet            *ifp;
 1775         struct in6_ifaddr       *ia;
 1776         struct ip6_hdr          *ip6;
 1777         struct mbuf             *mh, *md;
 1778         struct mld_hdr          *mld;
 1779 
 1780         NET_EPOCH_ASSERT();
 1781         IN6_MULTI_LIST_LOCK_ASSERT();
 1782         MLD_LOCK_ASSERT();
 1783 
 1784         ifp = in6m->in6m_ifp;
 1785         /* in process of being freed */
 1786         if (ifp == NULL)
 1787                 return (0);
 1788         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
 1789         /* ia may be NULL if link-local address is tentative. */
 1790 
 1791         mh = m_gethdr(M_NOWAIT, MT_DATA);
 1792         if (mh == NULL) {
 1793                 if (ia != NULL)
 1794                         ifa_free(&ia->ia_ifa);
 1795                 return (ENOMEM);
 1796         }
 1797         md = m_get(M_NOWAIT, MT_DATA);
 1798         if (md == NULL) {
 1799                 m_free(mh);
 1800                 if (ia != NULL)
 1801                         ifa_free(&ia->ia_ifa);
 1802                 return (ENOMEM);
 1803         }
 1804         mh->m_next = md;
 1805 
 1806         /*
 1807          * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
 1808          * that ether_output() does not need to allocate another mbuf
 1809          * for the header in the most common case.
 1810          */
 1811         M_ALIGN(mh, sizeof(struct ip6_hdr));
 1812         mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
 1813         mh->m_len = sizeof(struct ip6_hdr);
 1814 
 1815         ip6 = mtod(mh, struct ip6_hdr *);
 1816         ip6->ip6_flow = 0;
 1817         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 1818         ip6->ip6_vfc |= IPV6_VERSION;
 1819         ip6->ip6_nxt = IPPROTO_ICMPV6;
 1820         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
 1821         ip6->ip6_dst = in6m->in6m_addr;
 1822 
 1823         md->m_len = sizeof(struct mld_hdr);
 1824         mld = mtod(md, struct mld_hdr *);
 1825         mld->mld_type = type;
 1826         mld->mld_code = 0;
 1827         mld->mld_cksum = 0;
 1828         mld->mld_maxdelay = 0;
 1829         mld->mld_reserved = 0;
 1830         mld->mld_addr = in6m->in6m_addr;
 1831         in6_clearscope(&mld->mld_addr);
 1832         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
 1833             sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
 1834 
 1835         mld_save_context(mh, ifp);
 1836         mh->m_flags |= M_MLDV1;
 1837 
 1838         mld_dispatch_packet(mh);
 1839 
 1840         if (ia != NULL)
 1841                 ifa_free(&ia->ia_ifa);
 1842         return (0);
 1843 }
 1844 
 1845 /*
 1846  * Process a state change from the upper layer for the given IPv6 group.
 1847  *
 1848  * Each socket holds a reference on the in_multi in its own ip_moptions.
 1849  * The socket layer will have made the necessary updates to.the group
 1850  * state, it is now up to MLD to issue a state change report if there
 1851  * has been any change between T0 (when the last state-change was issued)
 1852  * and T1 (now).
 1853  *
 1854  * We use the MLDv2 state machine at group level. The MLd module
 1855  * however makes the decision as to which MLD protocol version to speak.
 1856  * A state change *from* INCLUDE {} always means an initial join.
 1857  * A state change *to* INCLUDE {} always means a final leave.
 1858  *
 1859  * If delay is non-zero, and the state change is an initial multicast
 1860  * join, the state change report will be delayed by 'delay' ticks
 1861  * in units of MLD_FASTHZ if MLDv1 is active on the link; otherwise
 1862  * the initial MLDv2 state change report will be delayed by whichever
 1863  * is sooner, a pending state-change timer or delay itself.
 1864  *
 1865  * VIMAGE: curvnet should have been set by caller, as this routine
 1866  * is called from the socket option handlers.
 1867  */
 1868 int
 1869 mld_change_state(struct in6_multi *inm, const int delay)
 1870 {
 1871         struct mld_ifsoftc *mli;
 1872         struct ifnet *ifp;
 1873         int error;
 1874 
 1875         IN6_MULTI_LIST_LOCK_ASSERT();
 1876 
 1877         error = 0;
 1878 
 1879         /*
 1880          * Check if the in6_multi has already been disconnected.
 1881          */
 1882         if (inm->in6m_ifp == NULL) {
 1883                 CTR1(KTR_MLD, "%s: inm is disconnected", __func__);
 1884                 return (0);
 1885         }
 1886 
 1887         /*
 1888          * Try to detect if the upper layer just asked us to change state
 1889          * for an interface which has now gone away.
 1890          */
 1891         KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__));
 1892         ifp = inm->in6m_ifma->ifma_ifp;
 1893         if (ifp == NULL)
 1894                 return (0);
 1895         /*
 1896          * Sanity check that netinet6's notion of ifp is the
 1897          * same as net's.
 1898          */
 1899         KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__));
 1900 
 1901         MLD_LOCK();
 1902         mli = MLD_IFINFO(ifp);
 1903         KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
 1904 
 1905         /*
 1906          * If we detect a state transition to or from MCAST_UNDEFINED
 1907          * for this group, then we are starting or finishing an MLD
 1908          * life cycle for this group.
 1909          */
 1910         if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
 1911                 CTR3(KTR_MLD, "%s: inm transition %d -> %d", __func__,
 1912                     inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode);
 1913                 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
 1914                         CTR1(KTR_MLD, "%s: initial join", __func__);
 1915                         error = mld_initial_join(inm, mli, delay);
 1916                         goto out_locked;
 1917                 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
 1918                         CTR1(KTR_MLD, "%s: final leave", __func__);
 1919                         mld_final_leave(inm, mli);
 1920                         goto out_locked;
 1921                 }
 1922         } else {
 1923                 CTR1(KTR_MLD, "%s: filter set change", __func__);
 1924         }
 1925 
 1926         error = mld_handle_state_change(inm, mli);
 1927 
 1928 out_locked:
 1929         MLD_UNLOCK();
 1930         return (error);
 1931 }
 1932 
 1933 /*
 1934  * Perform the initial join for an MLD group.
 1935  *
 1936  * When joining a group:
 1937  *  If the group should have its MLD traffic suppressed, do nothing.
 1938  *  MLDv1 starts sending MLDv1 host membership reports.
 1939  *  MLDv2 will schedule an MLDv2 state-change report containing the
 1940  *  initial state of the membership.
 1941  *
 1942  * If the delay argument is non-zero, then we must delay sending the
 1943  * initial state change for delay ticks (in units of MLD_FASTHZ).
 1944  */
 1945 static int
 1946 mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli,
 1947     const int delay)
 1948 {
 1949         struct epoch_tracker     et;
 1950         struct ifnet            *ifp;
 1951         struct mbufq            *mq;
 1952         int                      error, retval, syncstates;
 1953         int                      odelay;
 1954 #ifdef KTR
 1955         char                     ip6tbuf[INET6_ADDRSTRLEN];
 1956 #endif
 1957 
 1958         CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)",
 1959             __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 1960             inm->in6m_ifp, if_name(inm->in6m_ifp));
 1961 
 1962         error = 0;
 1963         syncstates = 1;
 1964 
 1965         ifp = inm->in6m_ifp;
 1966 
 1967         IN6_MULTI_LIST_LOCK_ASSERT();
 1968         MLD_LOCK_ASSERT();
 1969 
 1970         KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__));
 1971 
 1972         /*
 1973          * Groups joined on loopback or marked as 'not reported',
 1974          * enter the MLD_SILENT_MEMBER state and
 1975          * are never reported in any protocol exchanges.
 1976          * All other groups enter the appropriate state machine
 1977          * for the version in use on this link.
 1978          * A link marked as MLIF_SILENT causes MLD to be completely
 1979          * disabled for the link.
 1980          */
 1981         if ((ifp->if_flags & IFF_LOOPBACK) ||
 1982             (mli->mli_flags & MLIF_SILENT) ||
 1983             !mld_is_addr_reported(&inm->in6m_addr)) {
 1984                 CTR1(KTR_MLD,
 1985 "%s: not kicking state machine for silent group", __func__);
 1986                 inm->in6m_state = MLD_SILENT_MEMBER;
 1987                 inm->in6m_timer = 0;
 1988         } else {
 1989                 /*
 1990                  * Deal with overlapping in_multi lifecycle.
 1991                  * If this group was LEAVING, then make sure
 1992                  * we drop the reference we picked up to keep the
 1993                  * group around for the final INCLUDE {} enqueue.
 1994                  */
 1995                 if (mli->mli_version == MLD_VERSION_2 &&
 1996                     inm->in6m_state == MLD_LEAVING_MEMBER) {
 1997                         inm->in6m_refcount--;
 1998                         MPASS(inm->in6m_refcount > 0);
 1999                 }
 2000                 inm->in6m_state = MLD_REPORTING_MEMBER;
 2001 
 2002                 switch (mli->mli_version) {
 2003                 case MLD_VERSION_1:
 2004                         /*
 2005                          * If a delay was provided, only use it if
 2006                          * it is greater than the delay normally
 2007                          * used for an MLDv1 state change report,
 2008                          * and delay sending the initial MLDv1 report
 2009                          * by not transitioning to the IDLE state.
 2010                          */
 2011                         odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * MLD_FASTHZ);
 2012                         if (delay) {
 2013                                 inm->in6m_timer = max(delay, odelay);
 2014                                 V_current_state_timers_running6 = 1;
 2015                         } else {
 2016                                 inm->in6m_state = MLD_IDLE_MEMBER;
 2017                                 NET_EPOCH_ENTER(et);
 2018                                 error = mld_v1_transmit_report(inm,
 2019                                      MLD_LISTENER_REPORT);
 2020                                 NET_EPOCH_EXIT(et);
 2021                                 if (error == 0) {
 2022                                         inm->in6m_timer = odelay;
 2023                                         V_current_state_timers_running6 = 1;
 2024                                 }
 2025                         }
 2026                         break;
 2027 
 2028                 case MLD_VERSION_2:
 2029                         /*
 2030                          * Defer update of T0 to T1, until the first copy
 2031                          * of the state change has been transmitted.
 2032                          */
 2033                         syncstates = 0;
 2034 
 2035                         /*
 2036                          * Immediately enqueue a State-Change Report for
 2037                          * this interface, freeing any previous reports.
 2038                          * Don't kick the timers if there is nothing to do,
 2039                          * or if an error occurred.
 2040                          */
 2041                         mq = &inm->in6m_scq;
 2042                         mbufq_drain(mq);
 2043                         retval = mld_v2_enqueue_group_record(mq, inm, 1,
 2044                             0, 0, (mli->mli_flags & MLIF_USEALLOW));
 2045                         CTR2(KTR_MLD, "%s: enqueue record = %d",
 2046                             __func__, retval);
 2047                         if (retval <= 0) {
 2048                                 error = retval * -1;
 2049                                 break;
 2050                         }
 2051 
 2052                         /*
 2053                          * Schedule transmission of pending state-change
 2054                          * report up to RV times for this link. The timer
 2055                          * will fire at the next mld_fasttimo (~200ms),
 2056                          * giving us an opportunity to merge the reports.
 2057                          *
 2058                          * If a delay was provided to this function, only
 2059                          * use this delay if sooner than the existing one.
 2060                          */
 2061                         KASSERT(mli->mli_rv > 1,
 2062                            ("%s: invalid robustness %d", __func__,
 2063                             mli->mli_rv));
 2064                         inm->in6m_scrv = mli->mli_rv;
 2065                         if (delay) {
 2066                                 if (inm->in6m_sctimer > 1) {
 2067                                         inm->in6m_sctimer =
 2068                                             min(inm->in6m_sctimer, delay);
 2069                                 } else
 2070                                         inm->in6m_sctimer = delay;
 2071                         } else
 2072                                 inm->in6m_sctimer = 1;
 2073                         V_state_change_timers_running6 = 1;
 2074 
 2075                         error = 0;
 2076                         break;
 2077                 }
 2078         }
 2079 
 2080         /*
 2081          * Only update the T0 state if state change is atomic,
 2082          * i.e. we don't need to wait for a timer to fire before we
 2083          * can consider the state change to have been communicated.
 2084          */
 2085         if (syncstates) {
 2086                 in6m_commit(inm);
 2087                 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
 2088                     ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 2089                     if_name(inm->in6m_ifp));
 2090         }
 2091 
 2092         return (error);
 2093 }
 2094 
 2095 /*
 2096  * Issue an intermediate state change during the life-cycle.
 2097  */
 2098 static int
 2099 mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli)
 2100 {
 2101         struct ifnet            *ifp;
 2102         int                      retval;
 2103 #ifdef KTR
 2104         char                     ip6tbuf[INET6_ADDRSTRLEN];
 2105 #endif
 2106 
 2107         CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)",
 2108             __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 2109             inm->in6m_ifp, if_name(inm->in6m_ifp));
 2110 
 2111         ifp = inm->in6m_ifp;
 2112 
 2113         IN6_MULTI_LIST_LOCK_ASSERT();
 2114         MLD_LOCK_ASSERT();
 2115 
 2116         KASSERT(mli && mli->mli_ifp == ifp,
 2117             ("%s: inconsistent ifp", __func__));
 2118 
 2119         if ((ifp->if_flags & IFF_LOOPBACK) ||
 2120             (mli->mli_flags & MLIF_SILENT) ||
 2121             !mld_is_addr_reported(&inm->in6m_addr) ||
 2122             (mli->mli_version != MLD_VERSION_2)) {
 2123                 if (!mld_is_addr_reported(&inm->in6m_addr)) {
 2124                         CTR1(KTR_MLD,
 2125 "%s: not kicking state machine for silent group", __func__);
 2126                 }
 2127                 CTR1(KTR_MLD, "%s: nothing to do", __func__);
 2128                 in6m_commit(inm);
 2129                 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
 2130                     ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 2131                     if_name(inm->in6m_ifp));
 2132                 return (0);
 2133         }
 2134 
 2135         mbufq_drain(&inm->in6m_scq);
 2136 
 2137         retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
 2138             (mli->mli_flags & MLIF_USEALLOW));
 2139         CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval);
 2140         if (retval <= 0)
 2141                 return (-retval);
 2142 
 2143         /*
 2144          * If record(s) were enqueued, start the state-change
 2145          * report timer for this group.
 2146          */
 2147         inm->in6m_scrv = mli->mli_rv;
 2148         inm->in6m_sctimer = 1;
 2149         V_state_change_timers_running6 = 1;
 2150 
 2151         return (0);
 2152 }
 2153 
 2154 /*
 2155  * Perform the final leave for a multicast address.
 2156  *
 2157  * When leaving a group:
 2158  *  MLDv1 sends a DONE message, if and only if we are the reporter.
 2159  *  MLDv2 enqueues a state-change report containing a transition
 2160  *  to INCLUDE {} for immediate transmission.
 2161  */
 2162 static void
 2163 mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli)
 2164 {
 2165         struct epoch_tracker     et;
 2166         int syncstates;
 2167 #ifdef KTR
 2168         char ip6tbuf[INET6_ADDRSTRLEN];
 2169 #endif
 2170 
 2171         syncstates = 1;
 2172 
 2173         CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)",
 2174             __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 2175             inm->in6m_ifp, if_name(inm->in6m_ifp));
 2176 
 2177         IN6_MULTI_LIST_LOCK_ASSERT();
 2178         MLD_LOCK_ASSERT();
 2179 
 2180         switch (inm->in6m_state) {
 2181         case MLD_NOT_MEMBER:
 2182         case MLD_SILENT_MEMBER:
 2183         case MLD_LEAVING_MEMBER:
 2184                 /* Already leaving or left; do nothing. */
 2185                 CTR1(KTR_MLD,
 2186 "%s: not kicking state machine for silent group", __func__);
 2187                 break;
 2188         case MLD_REPORTING_MEMBER:
 2189         case MLD_IDLE_MEMBER:
 2190         case MLD_G_QUERY_PENDING_MEMBER:
 2191         case MLD_SG_QUERY_PENDING_MEMBER:
 2192                 if (mli->mli_version == MLD_VERSION_1) {
 2193 #ifdef INVARIANTS
 2194                         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
 2195                             inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER)
 2196                         panic("%s: MLDv2 state reached, not MLDv2 mode",
 2197                              __func__);
 2198 #endif
 2199                         NET_EPOCH_ENTER(et);
 2200                         mld_v1_transmit_report(inm, MLD_LISTENER_DONE);
 2201                         NET_EPOCH_EXIT(et);
 2202                         inm->in6m_state = MLD_NOT_MEMBER;
 2203                         V_current_state_timers_running6 = 1;
 2204                 } else if (mli->mli_version == MLD_VERSION_2) {
 2205                         /*
 2206                          * Stop group timer and all pending reports.
 2207                          * Immediately enqueue a state-change report
 2208                          * TO_IN {} to be sent on the next fast timeout,
 2209                          * giving us an opportunity to merge reports.
 2210                          */
 2211                         mbufq_drain(&inm->in6m_scq);
 2212                         inm->in6m_timer = 0;
 2213                         inm->in6m_scrv = mli->mli_rv;
 2214                         CTR4(KTR_MLD, "%s: Leaving %s/%s with %d "
 2215                             "pending retransmissions.", __func__,
 2216                             ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 2217                             if_name(inm->in6m_ifp), inm->in6m_scrv);
 2218                         if (inm->in6m_scrv == 0) {
 2219                                 inm->in6m_state = MLD_NOT_MEMBER;
 2220                                 inm->in6m_sctimer = 0;
 2221                         } else {
 2222                                 int retval __diagused;
 2223 
 2224                                 in6m_acquire_locked(inm);
 2225 
 2226                                 retval = mld_v2_enqueue_group_record(
 2227                                     &inm->in6m_scq, inm, 1, 0, 0,
 2228                                     (mli->mli_flags & MLIF_USEALLOW));
 2229                                 KASSERT(retval != 0,
 2230                                     ("%s: enqueue record = %d", __func__,
 2231                                      retval));
 2232 
 2233                                 inm->in6m_state = MLD_LEAVING_MEMBER;
 2234                                 inm->in6m_sctimer = 1;
 2235                                 V_state_change_timers_running6 = 1;
 2236                                 syncstates = 0;
 2237                         }
 2238                         break;
 2239                 }
 2240                 break;
 2241         case MLD_LAZY_MEMBER:
 2242         case MLD_SLEEPING_MEMBER:
 2243         case MLD_AWAKENING_MEMBER:
 2244                 /* Our reports are suppressed; do nothing. */
 2245                 break;
 2246         }
 2247 
 2248         if (syncstates) {
 2249                 in6m_commit(inm);
 2250                 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
 2251                     ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 2252                     if_name(inm->in6m_ifp));
 2253                 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
 2254                 CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s",
 2255                     __func__, &inm->in6m_addr, if_name(inm->in6m_ifp));
 2256         }
 2257 }
 2258 
 2259 /*
 2260  * Enqueue an MLDv2 group record to the given output queue.
 2261  *
 2262  * If is_state_change is zero, a current-state record is appended.
 2263  * If is_state_change is non-zero, a state-change report is appended.
 2264  *
 2265  * If is_group_query is non-zero, an mbuf packet chain is allocated.
 2266  * If is_group_query is zero, and if there is a packet with free space
 2267  * at the tail of the queue, it will be appended to providing there
 2268  * is enough free space.
 2269  * Otherwise a new mbuf packet chain is allocated.
 2270  *
 2271  * If is_source_query is non-zero, each source is checked to see if
 2272  * it was recorded for a Group-Source query, and will be omitted if
 2273  * it is not both in-mode and recorded.
 2274  *
 2275  * If use_block_allow is non-zero, state change reports for initial join
 2276  * and final leave, on an inclusive mode group with a source list, will be
 2277  * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
 2278  *
 2279  * The function will attempt to allocate leading space in the packet
 2280  * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
 2281  *
 2282  * If successful the size of all data appended to the queue is returned,
 2283  * otherwise an error code less than zero is returned, or zero if
 2284  * no record(s) were appended.
 2285  */
 2286 static int
 2287 mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm,
 2288     const int is_state_change, const int is_group_query,
 2289     const int is_source_query, const int use_block_allow)
 2290 {
 2291         struct mldv2_record      mr;
 2292         struct mldv2_record     *pmr;
 2293         struct ifnet            *ifp;
 2294         struct ip6_msource      *ims, *nims;
 2295         struct mbuf             *m0, *m, *md;
 2296         int                      is_filter_list_change;
 2297         int                      minrec0len, m0srcs, msrcs, nbytes, off;
 2298         int                      record_has_sources;
 2299         int                      now;
 2300         int                      type;
 2301         uint8_t                  mode;
 2302 #ifdef KTR
 2303         char                     ip6tbuf[INET6_ADDRSTRLEN];
 2304 #endif
 2305 
 2306         IN6_MULTI_LIST_LOCK_ASSERT();
 2307 
 2308         ifp = inm->in6m_ifp;
 2309         is_filter_list_change = 0;
 2310         m = NULL;
 2311         m0 = NULL;
 2312         m0srcs = 0;
 2313         msrcs = 0;
 2314         nbytes = 0;
 2315         nims = NULL;
 2316         record_has_sources = 1;
 2317         pmr = NULL;
 2318         type = MLD_DO_NOTHING;
 2319         mode = inm->in6m_st[1].iss_fmode;
 2320 
 2321         /*
 2322          * If we did not transition out of ASM mode during t0->t1,
 2323          * and there are no source nodes to process, we can skip
 2324          * the generation of source records.
 2325          */
 2326         if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
 2327             inm->in6m_nsrc == 0)
 2328                 record_has_sources = 0;
 2329 
 2330         if (is_state_change) {
 2331                 /*
 2332                  * Queue a state change record.
 2333                  * If the mode did not change, and there are non-ASM
 2334                  * listeners or source filters present,
 2335                  * we potentially need to issue two records for the group.
 2336                  * If there are ASM listeners, and there was no filter
 2337                  * mode transition of any kind, do nothing.
 2338                  *
 2339                  * If we are transitioning to MCAST_UNDEFINED, we need
 2340                  * not send any sources. A transition to/from this state is
 2341                  * considered inclusive with some special treatment.
 2342                  *
 2343                  * If we are rewriting initial joins/leaves to use
 2344                  * ALLOW/BLOCK, and the group's membership is inclusive,
 2345                  * we need to send sources in all cases.
 2346                  */
 2347                 if (mode != inm->in6m_st[0].iss_fmode) {
 2348                         if (mode == MCAST_EXCLUDE) {
 2349                                 CTR1(KTR_MLD, "%s: change to EXCLUDE",
 2350                                     __func__);
 2351                                 type = MLD_CHANGE_TO_EXCLUDE_MODE;
 2352                         } else {
 2353                                 CTR1(KTR_MLD, "%s: change to INCLUDE",
 2354                                     __func__);
 2355                                 if (use_block_allow) {
 2356                                         /*
 2357                                          * XXX
 2358                                          * Here we're interested in state
 2359                                          * edges either direction between
 2360                                          * MCAST_UNDEFINED and MCAST_INCLUDE.
 2361                                          * Perhaps we should just check
 2362                                          * the group state, rather than
 2363                                          * the filter mode.
 2364                                          */
 2365                                         if (mode == MCAST_UNDEFINED) {
 2366                                                 type = MLD_BLOCK_OLD_SOURCES;
 2367                                         } else {
 2368                                                 type = MLD_ALLOW_NEW_SOURCES;
 2369                                         }
 2370                                 } else {
 2371                                         type = MLD_CHANGE_TO_INCLUDE_MODE;
 2372                                         if (mode == MCAST_UNDEFINED)
 2373                                                 record_has_sources = 0;
 2374                                 }
 2375                         }
 2376                 } else {
 2377                         if (record_has_sources) {
 2378                                 is_filter_list_change = 1;
 2379                         } else {
 2380                                 type = MLD_DO_NOTHING;
 2381                         }
 2382                 }
 2383         } else {
 2384                 /*
 2385                  * Queue a current state record.
 2386                  */
 2387                 if (mode == MCAST_EXCLUDE) {
 2388                         type = MLD_MODE_IS_EXCLUDE;
 2389                 } else if (mode == MCAST_INCLUDE) {
 2390                         type = MLD_MODE_IS_INCLUDE;
 2391                         KASSERT(inm->in6m_st[1].iss_asm == 0,
 2392                             ("%s: inm %p is INCLUDE but ASM count is %d",
 2393                              __func__, inm, inm->in6m_st[1].iss_asm));
 2394                 }
 2395         }
 2396 
 2397         /*
 2398          * Generate the filter list changes using a separate function.
 2399          */
 2400         if (is_filter_list_change)
 2401                 return (mld_v2_enqueue_filter_change(mq, inm));
 2402 
 2403         if (type == MLD_DO_NOTHING) {
 2404                 CTR3(KTR_MLD, "%s: nothing to do for %s/%s",
 2405                     __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 2406                     if_name(inm->in6m_ifp));
 2407                 return (0);
 2408         }
 2409 
 2410         /*
 2411          * If any sources are present, we must be able to fit at least
 2412          * one in the trailing space of the tail packet's mbuf,
 2413          * ideally more.
 2414          */
 2415         minrec0len = sizeof(struct mldv2_record);
 2416         if (record_has_sources)
 2417                 minrec0len += sizeof(struct in6_addr);
 2418 
 2419         CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__,
 2420             mld_rec_type_to_str(type),
 2421             ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 2422             if_name(inm->in6m_ifp));
 2423 
 2424         /*
 2425          * Check if we have a packet in the tail of the queue for this
 2426          * group into which the first group record for this group will fit.
 2427          * Otherwise allocate a new packet.
 2428          * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
 2429          * Note: Group records for G/GSR query responses MUST be sent
 2430          * in their own packet.
 2431          */
 2432         m0 = mbufq_last(mq);
 2433         if (!is_group_query &&
 2434             m0 != NULL &&
 2435             (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
 2436             (m0->m_pkthdr.len + minrec0len) <
 2437              (ifp->if_mtu - MLD_MTUSPACE)) {
 2438                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 2439                             sizeof(struct mldv2_record)) /
 2440                             sizeof(struct in6_addr);
 2441                 m = m0;
 2442                 CTR1(KTR_MLD, "%s: use existing packet", __func__);
 2443         } else {
 2444                 if (mbufq_full(mq)) {
 2445                         CTR1(KTR_MLD, "%s: outbound queue full", __func__);
 2446                         return (-ENOMEM);
 2447                 }
 2448                 m = NULL;
 2449                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
 2450                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
 2451                 if (!is_state_change && !is_group_query)
 2452                         m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 2453                 if (m == NULL)
 2454                         m = m_gethdr(M_NOWAIT, MT_DATA);
 2455                 if (m == NULL)
 2456                         return (-ENOMEM);
 2457 
 2458                 mld_save_context(m, ifp);
 2459 
 2460                 CTR1(KTR_MLD, "%s: allocated first packet", __func__);
 2461         }
 2462 
 2463         /*
 2464          * Append group record.
 2465          * If we have sources, we don't know how many yet.
 2466          */
 2467         mr.mr_type = type;
 2468         mr.mr_datalen = 0;
 2469         mr.mr_numsrc = 0;
 2470         mr.mr_addr = inm->in6m_addr;
 2471         in6_clearscope(&mr.mr_addr);
 2472         if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
 2473                 if (m != m0)
 2474                         m_freem(m);
 2475                 CTR1(KTR_MLD, "%s: m_append() failed.", __func__);
 2476                 return (-ENOMEM);
 2477         }
 2478         nbytes += sizeof(struct mldv2_record);
 2479 
 2480         /*
 2481          * Append as many sources as will fit in the first packet.
 2482          * If we are appending to a new packet, the chain allocation
 2483          * may potentially use clusters; use m_getptr() in this case.
 2484          * If we are appending to an existing packet, we need to obtain
 2485          * a pointer to the group record after m_append(), in case a new
 2486          * mbuf was allocated.
 2487          *
 2488          * Only append sources which are in-mode at t1. If we are
 2489          * transitioning to MCAST_UNDEFINED state on the group, and
 2490          * use_block_allow is zero, do not include source entries.
 2491          * Otherwise, we need to include this source in the report.
 2492          *
 2493          * Only report recorded sources in our filter set when responding
 2494          * to a group-source query.
 2495          */
 2496         if (record_has_sources) {
 2497                 if (m == m0) {
 2498                         md = m_last(m);
 2499                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
 2500                             md->m_len - nbytes);
 2501                 } else {
 2502                         md = m_getptr(m, 0, &off);
 2503                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
 2504                             off);
 2505                 }
 2506                 msrcs = 0;
 2507                 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
 2508                     nims) {
 2509                         CTR2(KTR_MLD, "%s: visit node %s", __func__,
 2510                             ip6_sprintf(ip6tbuf, &ims->im6s_addr));
 2511                         now = im6s_get_mode(inm, ims, 1);
 2512                         CTR2(KTR_MLD, "%s: node is %d", __func__, now);
 2513                         if ((now != mode) ||
 2514                             (now == mode &&
 2515                              (!use_block_allow && mode == MCAST_UNDEFINED))) {
 2516                                 CTR1(KTR_MLD, "%s: skip node", __func__);
 2517                                 continue;
 2518                         }
 2519                         if (is_source_query && ims->im6s_stp == 0) {
 2520                                 CTR1(KTR_MLD, "%s: skip unrecorded node",
 2521                                     __func__);
 2522                                 continue;
 2523                         }
 2524                         CTR1(KTR_MLD, "%s: append node", __func__);
 2525                         if (!m_append(m, sizeof(struct in6_addr),
 2526                             (void *)&ims->im6s_addr)) {
 2527                                 if (m != m0)
 2528                                         m_freem(m);
 2529                                 CTR1(KTR_MLD, "%s: m_append() failed.",
 2530                                     __func__);
 2531                                 return (-ENOMEM);
 2532                         }
 2533                         nbytes += sizeof(struct in6_addr);
 2534                         ++msrcs;
 2535                         if (msrcs == m0srcs)
 2536                                 break;
 2537                 }
 2538                 CTR2(KTR_MLD, "%s: msrcs is %d this packet", __func__,
 2539                     msrcs);
 2540                 pmr->mr_numsrc = htons(msrcs);
 2541                 nbytes += (msrcs * sizeof(struct in6_addr));
 2542         }
 2543 
 2544         if (is_source_query && msrcs == 0) {
 2545                 CTR1(KTR_MLD, "%s: no recorded sources to report", __func__);
 2546                 if (m != m0)
 2547                         m_freem(m);
 2548                 return (0);
 2549         }
 2550 
 2551         /*
 2552          * We are good to go with first packet.
 2553          */
 2554         if (m != m0) {
 2555                 CTR1(KTR_MLD, "%s: enqueueing first packet", __func__);
 2556                 m->m_pkthdr.vt_nrecs = 1;
 2557                 mbufq_enqueue(mq, m);
 2558         } else
 2559                 m->m_pkthdr.vt_nrecs++;
 2560 
 2561         /*
 2562          * No further work needed if no source list in packet(s).
 2563          */
 2564         if (!record_has_sources)
 2565                 return (nbytes);
 2566 
 2567         /*
 2568          * Whilst sources remain to be announced, we need to allocate
 2569          * a new packet and fill out as many sources as will fit.
 2570          * Always try for a cluster first.
 2571          */
 2572         while (nims != NULL) {
 2573                 if (mbufq_full(mq)) {
 2574                         CTR1(KTR_MLD, "%s: outbound queue full", __func__);
 2575                         return (-ENOMEM);
 2576                 }
 2577                 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 2578                 if (m == NULL)
 2579                         m = m_gethdr(M_NOWAIT, MT_DATA);
 2580                 if (m == NULL)
 2581                         return (-ENOMEM);
 2582                 mld_save_context(m, ifp);
 2583                 md = m_getptr(m, 0, &off);
 2584                 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
 2585                 CTR1(KTR_MLD, "%s: allocated next packet", __func__);
 2586 
 2587                 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
 2588                         if (m != m0)
 2589                                 m_freem(m);
 2590                         CTR1(KTR_MLD, "%s: m_append() failed.", __func__);
 2591                         return (-ENOMEM);
 2592                 }
 2593                 m->m_pkthdr.vt_nrecs = 1;
 2594                 nbytes += sizeof(struct mldv2_record);
 2595 
 2596                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
 2597                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
 2598 
 2599                 msrcs = 0;
 2600                 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
 2601                         CTR2(KTR_MLD, "%s: visit node %s",
 2602                             __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr));
 2603                         now = im6s_get_mode(inm, ims, 1);
 2604                         if ((now != mode) ||
 2605                             (now == mode &&
 2606                              (!use_block_allow && mode == MCAST_UNDEFINED))) {
 2607                                 CTR1(KTR_MLD, "%s: skip node", __func__);
 2608                                 continue;
 2609                         }
 2610                         if (is_source_query && ims->im6s_stp == 0) {
 2611                                 CTR1(KTR_MLD, "%s: skip unrecorded node",
 2612                                     __func__);
 2613                                 continue;
 2614                         }
 2615                         CTR1(KTR_MLD, "%s: append node", __func__);
 2616                         if (!m_append(m, sizeof(struct in6_addr),
 2617                             (void *)&ims->im6s_addr)) {
 2618                                 if (m != m0)
 2619                                         m_freem(m);
 2620                                 CTR1(KTR_MLD, "%s: m_append() failed.",
 2621                                     __func__);
 2622                                 return (-ENOMEM);
 2623                         }
 2624                         ++msrcs;
 2625                         if (msrcs == m0srcs)
 2626                                 break;
 2627                 }
 2628                 pmr->mr_numsrc = htons(msrcs);
 2629                 nbytes += (msrcs * sizeof(struct in6_addr));
 2630 
 2631                 CTR1(KTR_MLD, "%s: enqueueing next packet", __func__);
 2632                 mbufq_enqueue(mq, m);
 2633         }
 2634 
 2635         return (nbytes);
 2636 }
 2637 
 2638 /*
 2639  * Type used to mark record pass completion.
 2640  * We exploit the fact we can cast to this easily from the
 2641  * current filter modes on each ip_msource node.
 2642  */
 2643 typedef enum {
 2644         REC_NONE = 0x00,        /* MCAST_UNDEFINED */
 2645         REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
 2646         REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
 2647         REC_FULL = REC_ALLOW | REC_BLOCK
 2648 } rectype_t;
 2649 
 2650 /*
 2651  * Enqueue an MLDv2 filter list change to the given output queue.
 2652  *
 2653  * Source list filter state is held in an RB-tree. When the filter list
 2654  * for a group is changed without changing its mode, we need to compute
 2655  * the deltas between T0 and T1 for each source in the filter set,
 2656  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
 2657  *
 2658  * As we may potentially queue two record types, and the entire R-B tree
 2659  * needs to be walked at once, we break this out into its own function
 2660  * so we can generate a tightly packed queue of packets.
 2661  *
 2662  * XXX This could be written to only use one tree walk, although that makes
 2663  * serializing into the mbuf chains a bit harder. For now we do two walks
 2664  * which makes things easier on us, and it may or may not be harder on
 2665  * the L2 cache.
 2666  *
 2667  * If successful the size of all data appended to the queue is returned,
 2668  * otherwise an error code less than zero is returned, or zero if
 2669  * no record(s) were appended.
 2670  */
 2671 static int
 2672 mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm)
 2673 {
 2674         static const int MINRECLEN =
 2675             sizeof(struct mldv2_record) + sizeof(struct in6_addr);
 2676         struct ifnet            *ifp;
 2677         struct mldv2_record      mr;
 2678         struct mldv2_record     *pmr;
 2679         struct ip6_msource      *ims, *nims;
 2680         struct mbuf             *m, *m0, *md;
 2681         int                      m0srcs, nbytes, npbytes, off, rsrcs, schanged;
 2682         uint8_t                  mode, now, then;
 2683         rectype_t                crt, drt, nrt;
 2684 #ifdef KTR
 2685         int                      nallow, nblock;
 2686         char                     ip6tbuf[INET6_ADDRSTRLEN];
 2687 #endif
 2688 
 2689         IN6_MULTI_LIST_LOCK_ASSERT();
 2690 
 2691         if (inm->in6m_nsrc == 0 ||
 2692             (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
 2693                 return (0);
 2694 
 2695         ifp = inm->in6m_ifp;                    /* interface */
 2696         mode = inm->in6m_st[1].iss_fmode;       /* filter mode at t1 */
 2697         crt = REC_NONE; /* current group record type */
 2698         drt = REC_NONE; /* mask of completed group record types */
 2699         nrt = REC_NONE; /* record type for current node */
 2700         m0srcs = 0;     /* # source which will fit in current mbuf chain */
 2701         npbytes = 0;    /* # of bytes appended this packet */
 2702         nbytes = 0;     /* # of bytes appended to group's state-change queue */
 2703         rsrcs = 0;      /* # sources encoded in current record */
 2704         schanged = 0;   /* # nodes encoded in overall filter change */
 2705 #ifdef KTR
 2706         nallow = 0;     /* # of source entries in ALLOW_NEW */
 2707         nblock = 0;     /* # of source entries in BLOCK_OLD */
 2708 #endif
 2709         nims = NULL;    /* next tree node pointer */
 2710 
 2711         /*
 2712          * For each possible filter record mode.
 2713          * The first kind of source we encounter tells us which
 2714          * is the first kind of record we start appending.
 2715          * If a node transitioned to UNDEFINED at t1, its mode is treated
 2716          * as the inverse of the group's filter mode.
 2717          */
 2718         while (drt != REC_FULL) {
 2719                 do {
 2720                         m0 = mbufq_last(mq);
 2721                         if (m0 != NULL &&
 2722                             (m0->m_pkthdr.vt_nrecs + 1 <=
 2723                              MLD_V2_REPORT_MAXRECS) &&
 2724                             (m0->m_pkthdr.len + MINRECLEN) <
 2725                              (ifp->if_mtu - MLD_MTUSPACE)) {
 2726                                 m = m0;
 2727                                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 2728                                             sizeof(struct mldv2_record)) /
 2729                                             sizeof(struct in6_addr);
 2730                                 CTR1(KTR_MLD,
 2731                                     "%s: use previous packet", __func__);
 2732                         } else {
 2733                                 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 2734                                 if (m == NULL)
 2735                                         m = m_gethdr(M_NOWAIT, MT_DATA);
 2736                                 if (m == NULL) {
 2737                                         CTR1(KTR_MLD,
 2738                                             "%s: m_get*() failed", __func__);
 2739                                         return (-ENOMEM);
 2740                                 }
 2741                                 m->m_pkthdr.vt_nrecs = 0;
 2742                                 mld_save_context(m, ifp);
 2743                                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
 2744                                     sizeof(struct mldv2_record)) /
 2745                                     sizeof(struct in6_addr);
 2746                                 npbytes = 0;
 2747                                 CTR1(KTR_MLD,
 2748                                     "%s: allocated new packet", __func__);
 2749                         }
 2750                         /*
 2751                          * Append the MLD group record header to the
 2752                          * current packet's data area.
 2753                          * Recalculate pointer to free space for next
 2754                          * group record, in case m_append() allocated
 2755                          * a new mbuf or cluster.
 2756                          */
 2757                         memset(&mr, 0, sizeof(mr));
 2758                         mr.mr_addr = inm->in6m_addr;
 2759                         in6_clearscope(&mr.mr_addr);
 2760                         if (!m_append(m, sizeof(mr), (void *)&mr)) {
 2761                                 if (m != m0)
 2762                                         m_freem(m);
 2763                                 CTR1(KTR_MLD,
 2764                                     "%s: m_append() failed", __func__);
 2765                                 return (-ENOMEM);
 2766                         }
 2767                         npbytes += sizeof(struct mldv2_record);
 2768                         if (m != m0) {
 2769                                 /* new packet; offset in chain */
 2770                                 md = m_getptr(m, npbytes -
 2771                                     sizeof(struct mldv2_record), &off);
 2772                                 pmr = (struct mldv2_record *)(mtod(md,
 2773                                     uint8_t *) + off);
 2774                         } else {
 2775                                 /* current packet; offset from last append */
 2776                                 md = m_last(m);
 2777                                 pmr = (struct mldv2_record *)(mtod(md,
 2778                                     uint8_t *) + md->m_len -
 2779                                     sizeof(struct mldv2_record));
 2780                         }
 2781                         /*
 2782                          * Begin walking the tree for this record type
 2783                          * pass, or continue from where we left off
 2784                          * previously if we had to allocate a new packet.
 2785                          * Only report deltas in-mode at t1.
 2786                          * We need not report included sources as allowed
 2787                          * if we are in inclusive mode on the group,
 2788                          * however the converse is not true.
 2789                          */
 2790                         rsrcs = 0;
 2791                         if (nims == NULL) {
 2792                                 nims = RB_MIN(ip6_msource_tree,
 2793                                     &inm->in6m_srcs);
 2794                         }
 2795                         RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
 2796                                 CTR2(KTR_MLD, "%s: visit node %s", __func__,
 2797                                     ip6_sprintf(ip6tbuf, &ims->im6s_addr));
 2798                                 now = im6s_get_mode(inm, ims, 1);
 2799                                 then = im6s_get_mode(inm, ims, 0);
 2800                                 CTR3(KTR_MLD, "%s: mode: t0 %d, t1 %d",
 2801                                     __func__, then, now);
 2802                                 if (now == then) {
 2803                                         CTR1(KTR_MLD,
 2804                                             "%s: skip unchanged", __func__);
 2805                                         continue;
 2806                                 }
 2807                                 if (mode == MCAST_EXCLUDE &&
 2808                                     now == MCAST_INCLUDE) {
 2809                                         CTR1(KTR_MLD,
 2810                                             "%s: skip IN src on EX group",
 2811                                             __func__);
 2812                                         continue;
 2813                                 }
 2814                                 nrt = (rectype_t)now;
 2815                                 if (nrt == REC_NONE)
 2816                                         nrt = (rectype_t)(~mode & REC_FULL);
 2817                                 if (schanged++ == 0) {
 2818                                         crt = nrt;
 2819                                 } else if (crt != nrt)
 2820                                         continue;
 2821                                 if (!m_append(m, sizeof(struct in6_addr),
 2822                                     (void *)&ims->im6s_addr)) {
 2823                                         if (m != m0)
 2824                                                 m_freem(m);
 2825                                         CTR1(KTR_MLD,
 2826                                             "%s: m_append() failed", __func__);
 2827                                         return (-ENOMEM);
 2828                                 }
 2829 #ifdef KTR
 2830                                 nallow += !!(crt == REC_ALLOW);
 2831                                 nblock += !!(crt == REC_BLOCK);
 2832 #endif
 2833                                 if (++rsrcs == m0srcs)
 2834                                         break;
 2835                         }
 2836                         /*
 2837                          * If we did not append any tree nodes on this
 2838                          * pass, back out of allocations.
 2839                          */
 2840                         if (rsrcs == 0) {
 2841                                 npbytes -= sizeof(struct mldv2_record);
 2842                                 if (m != m0) {
 2843                                         CTR1(KTR_MLD,
 2844                                             "%s: m_free(m)", __func__);
 2845                                         m_freem(m);
 2846                                 } else {
 2847                                         CTR1(KTR_MLD,
 2848                                             "%s: m_adj(m, -mr)", __func__);
 2849                                         m_adj(m, -((int)sizeof(
 2850                                             struct mldv2_record)));
 2851                                 }
 2852                                 continue;
 2853                         }
 2854                         npbytes += (rsrcs * sizeof(struct in6_addr));
 2855                         if (crt == REC_ALLOW)
 2856                                 pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
 2857                         else if (crt == REC_BLOCK)
 2858                                 pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
 2859                         pmr->mr_numsrc = htons(rsrcs);
 2860                         /*
 2861                          * Count the new group record, and enqueue this
 2862                          * packet if it wasn't already queued.
 2863                          */
 2864                         m->m_pkthdr.vt_nrecs++;
 2865                         if (m != m0)
 2866                                 mbufq_enqueue(mq, m);
 2867                         nbytes += npbytes;
 2868                 } while (nims != NULL);
 2869                 drt |= crt;
 2870                 crt = (~crt & REC_FULL);
 2871         }
 2872 
 2873         CTR3(KTR_MLD, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
 2874             nallow, nblock);
 2875 
 2876         return (nbytes);
 2877 }
 2878 
 2879 static int
 2880 mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq)
 2881 {
 2882         struct mbufq    *gq;
 2883         struct mbuf     *m;             /* pending state-change */
 2884         struct mbuf     *m0;            /* copy of pending state-change */
 2885         struct mbuf     *mt;            /* last state-change in packet */
 2886         int              docopy, domerge;
 2887         u_int            recslen;
 2888 
 2889         docopy = 0;
 2890         domerge = 0;
 2891         recslen = 0;
 2892 
 2893         IN6_MULTI_LIST_LOCK_ASSERT();
 2894         MLD_LOCK_ASSERT();
 2895 
 2896         /*
 2897          * If there are further pending retransmissions, make a writable
 2898          * copy of each queued state-change message before merging.
 2899          */
 2900         if (inm->in6m_scrv > 0)
 2901                 docopy = 1;
 2902 
 2903         gq = &inm->in6m_scq;
 2904 #ifdef KTR
 2905         if (mbufq_first(gq) == NULL) {
 2906                 CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty",
 2907                     __func__, inm);
 2908         }
 2909 #endif
 2910 
 2911         m = mbufq_first(gq);
 2912         while (m != NULL) {
 2913                 /*
 2914                  * Only merge the report into the current packet if
 2915                  * there is sufficient space to do so; an MLDv2 report
 2916                  * packet may only contain 65,535 group records.
 2917                  * Always use a simple mbuf chain concatentation to do this,
 2918                  * as large state changes for single groups may have
 2919                  * allocated clusters.
 2920                  */
 2921                 domerge = 0;
 2922                 mt = mbufq_last(scq);
 2923                 if (mt != NULL) {
 2924                         recslen = m_length(m, NULL);
 2925 
 2926                         if ((mt->m_pkthdr.vt_nrecs +
 2927                             m->m_pkthdr.vt_nrecs <=
 2928                             MLD_V2_REPORT_MAXRECS) &&
 2929                             (mt->m_pkthdr.len + recslen <=
 2930                             (inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
 2931                                 domerge = 1;
 2932                 }
 2933 
 2934                 if (!domerge && mbufq_full(gq)) {
 2935                         CTR2(KTR_MLD,
 2936                             "%s: outbound queue full, skipping whole packet %p",
 2937                             __func__, m);
 2938                         mt = m->m_nextpkt;
 2939                         if (!docopy)
 2940                                 m_freem(m);
 2941                         m = mt;
 2942                         continue;
 2943                 }
 2944 
 2945                 if (!docopy) {
 2946                         CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m);
 2947                         m0 = mbufq_dequeue(gq);
 2948                         m = m0->m_nextpkt;
 2949                 } else {
 2950                         CTR2(KTR_MLD, "%s: copying %p", __func__, m);
 2951                         m0 = m_dup(m, M_NOWAIT);
 2952                         if (m0 == NULL)
 2953                                 return (ENOMEM);
 2954                         m0->m_nextpkt = NULL;
 2955                         m = m->m_nextpkt;
 2956                 }
 2957 
 2958                 if (!domerge) {
 2959                         CTR3(KTR_MLD, "%s: queueing %p to scq %p)",
 2960                             __func__, m0, scq);
 2961                         mbufq_enqueue(scq, m0);
 2962                 } else {
 2963                         struct mbuf *mtl;       /* last mbuf of packet mt */
 2964 
 2965                         CTR3(KTR_MLD, "%s: merging %p with ifscq tail %p)",
 2966                             __func__, m0, mt);
 2967 
 2968                         mtl = m_last(mt);
 2969                         m0->m_flags &= ~M_PKTHDR;
 2970                         mt->m_pkthdr.len += recslen;
 2971                         mt->m_pkthdr.vt_nrecs +=
 2972                             m0->m_pkthdr.vt_nrecs;
 2973 
 2974                         mtl->m_next = m0;
 2975                 }
 2976         }
 2977 
 2978         return (0);
 2979 }
 2980 
 2981 /*
 2982  * Respond to a pending MLDv2 General Query.
 2983  */
 2984 static void
 2985 mld_v2_dispatch_general_query(struct mld_ifsoftc *mli)
 2986 {
 2987         struct ifmultiaddr      *ifma;
 2988         struct ifnet            *ifp;
 2989         struct in6_multi        *inm;
 2990         int                      retval __unused;
 2991 
 2992         NET_EPOCH_ASSERT();
 2993         IN6_MULTI_LIST_LOCK_ASSERT();
 2994         MLD_LOCK_ASSERT();
 2995 
 2996         KASSERT(mli->mli_version == MLD_VERSION_2,
 2997             ("%s: called when version %d", __func__, mli->mli_version));
 2998 
 2999         /*
 3000          * Check that there are some packets queued. If so, send them first.
 3001          * For large number of groups the reply to general query can take
 3002          * many packets, we should finish sending them before starting of
 3003          * queuing the new reply.
 3004          */
 3005         if (mbufq_len(&mli->mli_gq) != 0)
 3006                 goto send;
 3007 
 3008         ifp = mli->mli_ifp;
 3009 
 3010         CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 3011                 inm = in6m_ifmultiaddr_get_inm(ifma);
 3012                 if (inm == NULL)
 3013                         continue;
 3014                 KASSERT(ifp == inm->in6m_ifp,
 3015                     ("%s: inconsistent ifp", __func__));
 3016 
 3017                 switch (inm->in6m_state) {
 3018                 case MLD_NOT_MEMBER:
 3019                 case MLD_SILENT_MEMBER:
 3020                         break;
 3021                 case MLD_REPORTING_MEMBER:
 3022                 case MLD_IDLE_MEMBER:
 3023                 case MLD_LAZY_MEMBER:
 3024                 case MLD_SLEEPING_MEMBER:
 3025                 case MLD_AWAKENING_MEMBER:
 3026                         inm->in6m_state = MLD_REPORTING_MEMBER;
 3027                         retval = mld_v2_enqueue_group_record(&mli->mli_gq,
 3028                             inm, 0, 0, 0, 0);
 3029                         CTR2(KTR_MLD, "%s: enqueue record = %d",
 3030                             __func__, retval);
 3031                         break;
 3032                 case MLD_G_QUERY_PENDING_MEMBER:
 3033                 case MLD_SG_QUERY_PENDING_MEMBER:
 3034                 case MLD_LEAVING_MEMBER:
 3035                         break;
 3036                 }
 3037         }
 3038 
 3039 send:
 3040         mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
 3041 
 3042         /*
 3043          * Slew transmission of bursts over 500ms intervals.
 3044          */
 3045         if (mbufq_first(&mli->mli_gq) != NULL) {
 3046                 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
 3047                     MLD_RESPONSE_BURST_INTERVAL);
 3048                 V_interface_timers_running6 = 1;
 3049         }
 3050 }
 3051 
 3052 /*
 3053  * Transmit the next pending message in the output queue.
 3054  *
 3055  * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
 3056  * MRT: Nothing needs to be done, as MLD traffic is always local to
 3057  * a link and uses a link-scope multicast address.
 3058  */
 3059 static void
 3060 mld_dispatch_packet(struct mbuf *m)
 3061 {
 3062         struct ip6_moptions      im6o;
 3063         struct ifnet            *ifp;
 3064         struct ifnet            *oifp;
 3065         struct mbuf             *m0;
 3066         struct mbuf             *md;
 3067         struct ip6_hdr          *ip6;
 3068         struct mld_hdr          *mld;
 3069         int                      error;
 3070         int                      off;
 3071         int                      type;
 3072         uint32_t                 ifindex;
 3073 
 3074         CTR2(KTR_MLD, "%s: transmit %p", __func__, m);
 3075         NET_EPOCH_ASSERT();
 3076 
 3077         /*
 3078          * Set VNET image pointer from enqueued mbuf chain
 3079          * before doing anything else. Whilst we use interface
 3080          * indexes to guard against interface detach, they are
 3081          * unique to each VIMAGE and must be retrieved.
 3082          */
 3083         ifindex = mld_restore_context(m);
 3084 
 3085         /*
 3086          * Check if the ifnet still exists. This limits the scope of
 3087          * any race in the absence of a global ifp lock for low cost
 3088          * (an array lookup).
 3089          */
 3090         ifp = ifnet_byindex(ifindex);
 3091         if (ifp == NULL) {
 3092                 CTR3(KTR_MLD, "%s: dropped %p as ifindex %u went away.",
 3093                     __func__, m, ifindex);
 3094                 m_freem(m);
 3095                 IP6STAT_INC(ip6s_noroute);
 3096                 goto out;
 3097         }
 3098 
 3099         im6o.im6o_multicast_hlim  = 1;
 3100         im6o.im6o_multicast_loop = (V_ip6_mrouter != NULL);
 3101         im6o.im6o_multicast_ifp = ifp;
 3102 
 3103         if (m->m_flags & M_MLDV1) {
 3104                 m0 = m;
 3105         } else {
 3106                 m0 = mld_v2_encap_report(ifp, m);
 3107                 if (m0 == NULL) {
 3108                         CTR2(KTR_MLD, "%s: dropped %p", __func__, m);
 3109                         IP6STAT_INC(ip6s_odropped);
 3110                         goto out;
 3111                 }
 3112         }
 3113 
 3114         mld_scrub_context(m0);
 3115         m_clrprotoflags(m);
 3116         m0->m_pkthdr.rcvif = V_loif;
 3117 
 3118         ip6 = mtod(m0, struct ip6_hdr *);
 3119 #if 0
 3120         (void)in6_setscope(&ip6->ip6_dst, ifp, NULL);   /* XXX LOR */
 3121 #else
 3122         /*
 3123          * XXX XXX Break some KPI rules to prevent an LOR which would
 3124          * occur if we called in6_setscope() at transmission.
 3125          * See comments at top of file.
 3126          */
 3127         MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index);
 3128 #endif
 3129 
 3130         /*
 3131          * Retrieve the ICMPv6 type before handoff to ip6_output(),
 3132          * so we can bump the stats.
 3133          */
 3134         md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
 3135         mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
 3136         type = mld->mld_type;
 3137 
 3138         oifp = NULL;
 3139         error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o,
 3140             &oifp, NULL);
 3141         if (error) {
 3142                 CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error);
 3143                 goto out;
 3144         }
 3145         ICMP6STAT_INC(icp6s_outhist[type]);
 3146         if (oifp != NULL) {
 3147                 icmp6_ifstat_inc(oifp, ifs6_out_msg);
 3148                 switch (type) {
 3149                 case MLD_LISTENER_REPORT:
 3150                 case MLDV2_LISTENER_REPORT:
 3151                         icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
 3152                         break;
 3153                 case MLD_LISTENER_DONE:
 3154                         icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
 3155                         break;
 3156                 }
 3157         }
 3158 out:
 3159         return;
 3160 }
 3161 
 3162 /*
 3163  * Encapsulate an MLDv2 report.
 3164  *
 3165  * KAME IPv6 requires that hop-by-hop options be passed separately,
 3166  * and that the IPv6 header be prepended in a separate mbuf.
 3167  *
 3168  * Returns a pointer to the new mbuf chain head, or NULL if the
 3169  * allocation failed.
 3170  */
 3171 static struct mbuf *
 3172 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
 3173 {
 3174         struct mbuf             *mh;
 3175         struct mldv2_report     *mld;
 3176         struct ip6_hdr          *ip6;
 3177         struct in6_ifaddr       *ia;
 3178         int                      mldreclen;
 3179 
 3180         KASSERT(ifp != NULL, ("%s: null ifp", __func__));
 3181         KASSERT((m->m_flags & M_PKTHDR),
 3182             ("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
 3183 
 3184         /*
 3185          * RFC3590: OK to send as :: or tentative during DAD.
 3186          */
 3187         NET_EPOCH_ASSERT();
 3188         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
 3189         if (ia == NULL)
 3190                 CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__);
 3191 
 3192         mh = m_gethdr(M_NOWAIT, MT_DATA);
 3193         if (mh == NULL) {
 3194                 if (ia != NULL)
 3195                         ifa_free(&ia->ia_ifa);
 3196                 m_freem(m);
 3197                 return (NULL);
 3198         }
 3199         M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
 3200 
 3201         mldreclen = m_length(m, NULL);
 3202         CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen);
 3203 
 3204         mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
 3205         mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
 3206             sizeof(struct mldv2_report) + mldreclen;
 3207 
 3208         ip6 = mtod(mh, struct ip6_hdr *);
 3209         ip6->ip6_flow = 0;
 3210         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 3211         ip6->ip6_vfc |= IPV6_VERSION;
 3212         ip6->ip6_nxt = IPPROTO_ICMPV6;
 3213         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
 3214         if (ia != NULL)
 3215                 ifa_free(&ia->ia_ifa);
 3216         ip6->ip6_dst = in6addr_linklocal_allv2routers;
 3217         /* scope ID will be set in netisr */
 3218 
 3219         mld = (struct mldv2_report *)(ip6 + 1);
 3220         mld->mld_type = MLDV2_LISTENER_REPORT;
 3221         mld->mld_code = 0;
 3222         mld->mld_cksum = 0;
 3223         mld->mld_v2_reserved = 0;
 3224         mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
 3225         m->m_pkthdr.vt_nrecs = 0;
 3226 
 3227         mh->m_next = m;
 3228         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
 3229             sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
 3230         return (mh);
 3231 }
 3232 
 3233 #ifdef KTR
 3234 static char *
 3235 mld_rec_type_to_str(const int type)
 3236 {
 3237 
 3238         switch (type) {
 3239                 case MLD_CHANGE_TO_EXCLUDE_MODE:
 3240                         return "TO_EX";
 3241                         break;
 3242                 case MLD_CHANGE_TO_INCLUDE_MODE:
 3243                         return "TO_IN";
 3244                         break;
 3245                 case MLD_MODE_IS_EXCLUDE:
 3246                         return "MODE_EX";
 3247                         break;
 3248                 case MLD_MODE_IS_INCLUDE:
 3249                         return "MODE_IN";
 3250                         break;
 3251                 case MLD_ALLOW_NEW_SOURCES:
 3252                         return "ALLOW_NEW";
 3253                         break;
 3254                 case MLD_BLOCK_OLD_SOURCES:
 3255                         return "BLOCK_OLD";
 3256                         break;
 3257                 default:
 3258                         break;
 3259         }
 3260         return "unknown";
 3261 }
 3262 #endif
 3263 
 3264 static void
 3265 mld_init(void *unused __unused)
 3266 {
 3267 
 3268         CTR1(KTR_MLD, "%s: initializing", __func__);
 3269         MLD_LOCK_INIT();
 3270 
 3271         ip6_initpktopts(&mld_po);
 3272         mld_po.ip6po_hlim = 1;
 3273         mld_po.ip6po_hbh = &mld_ra.hbh;
 3274         mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
 3275         mld_po.ip6po_flags = IP6PO_DONTFRAG;
 3276 
 3277         callout_init(&mldslow_callout, 1);
 3278         callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL);
 3279         callout_init(&mldfast_callout, 1);
 3280         callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL);
 3281 }
 3282 SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL);
 3283 
 3284 static void
 3285 mld_uninit(void *unused __unused)
 3286 {
 3287 
 3288         CTR1(KTR_MLD, "%s: tearing down", __func__);
 3289         callout_drain(&mldslow_callout);
 3290         callout_drain(&mldfast_callout);
 3291         MLD_LOCK_DESTROY();
 3292 }
 3293 SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL);
 3294 
 3295 static void
 3296 vnet_mld_init(const void *unused __unused)
 3297 {
 3298 
 3299         CTR1(KTR_MLD, "%s: initializing", __func__);
 3300 
 3301         LIST_INIT(&V_mli_head);
 3302 }
 3303 VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init,
 3304     NULL);
 3305 
 3306 static void
 3307 vnet_mld_uninit(const void *unused __unused)
 3308 {
 3309 
 3310         /* This can happen if we shutdown the network stack. */
 3311         CTR1(KTR_MLD, "%s: tearing down", __func__);
 3312 }
 3313 VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit,
 3314     NULL);
 3315 
 3316 static int
 3317 mld_modevent(module_t mod, int type, void *unused __unused)
 3318 {
 3319 
 3320     switch (type) {
 3321     case MOD_LOAD:
 3322     case MOD_UNLOAD:
 3323         break;
 3324     default:
 3325         return (EOPNOTSUPP);
 3326     }
 3327     return (0);
 3328 }
 3329 
 3330 static moduledata_t mld_mod = {
 3331     "mld",
 3332     mld_modevent,
 3333     0
 3334 };
 3335 DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY);

Cache object: f8cc56cb44fd551f93e509583ff07e43


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.