The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/igmp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 2007-2009 Bruce Simpson.
    5  * Copyright (c) 1988 Stephen Deering.
    6  * Copyright (c) 1992, 1993
    7  *      The Regents of the University of California.  All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * Stephen Deering of Stanford University.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)igmp.c      8.1 (Berkeley) 7/19/93
   37  */
   38 
   39 /*
   40  * Internet Group Management Protocol (IGMP) routines.
   41  * [RFC1112, RFC2236, RFC3376]
   42  *
   43  * Written by Steve Deering, Stanford, May 1988.
   44  * Modified by Rosen Sharma, Stanford, Aug 1994.
   45  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
   46  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
   47  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
   48  *
   49  * MULTICAST Revision: 3.5.1.4
   50  */
   51 
   52 #include <sys/cdefs.h>
   53 __FBSDID("$FreeBSD$");
   54 
   55 #include "opt_ddb.h"
   56 
   57 #include <sys/param.h>
   58 #include <sys/systm.h>
   59 #include <sys/module.h>
   60 #include <sys/malloc.h>
   61 #include <sys/mbuf.h>
   62 #include <sys/socket.h>
   63 #include <sys/kernel.h>
   64 #include <sys/lock.h>
   65 #include <sys/sysctl.h>
   66 #include <sys/ktr.h>
   67 #include <sys/condvar.h>
   68 
   69 #ifdef DDB
   70 #include <ddb/ddb.h>
   71 #endif
   72 
   73 #include <net/if.h>
   74 #include <net/if_var.h>
   75 #include <net/netisr.h>
   76 #include <net/vnet.h>
   77 
   78 #include <netinet/in.h>
   79 #include <netinet/in_var.h>
   80 #include <netinet/in_systm.h>
   81 #include <netinet/ip.h>
   82 #include <netinet/ip_var.h>
   83 #include <netinet/ip_options.h>
   84 #include <netinet/igmp.h>
   85 #include <netinet/igmp_var.h>
   86 
   87 #include <machine/in_cksum.h>
   88 
   89 #include <security/mac/mac_framework.h>
   90 
   91 #ifndef KTR_IGMPV3
   92 #define KTR_IGMPV3 KTR_INET
   93 #endif
   94 
   95 #define IGMP_SLOWHZ     2       /* 2 slow timeouts per second */
   96 #define IGMP_FASTHZ     5       /* 5 fast timeouts per second */
   97 #define IGMP_RESPONSE_BURST_INTERVAL    (IGMP_FASTHZ / 2)
   98 
   99 static struct igmp_ifsoftc *
  100                 igi_alloc_locked(struct ifnet *);
  101 static void     igi_delete_locked(const struct ifnet *);
  102 static void     igmp_dispatch_queue(struct mbufq *, int, const int);
  103 static void     igmp_fasttimo_vnet(void);
  104 static void     igmp_final_leave(struct in_multi *, struct igmp_ifsoftc *);
  105 static int      igmp_handle_state_change(struct in_multi *,
  106                     struct igmp_ifsoftc *);
  107 static int      igmp_initial_join(struct in_multi *, struct igmp_ifsoftc *);
  108 static int      igmp_input_v1_query(struct ifnet *, const struct ip *,
  109                     const struct igmp *);
  110 static int      igmp_input_v2_query(struct ifnet *, const struct ip *,
  111                     const struct igmp *);
  112 static int      igmp_input_v3_query(struct ifnet *, const struct ip *,
  113                     /*const*/ struct igmpv3 *);
  114 static int      igmp_input_v3_group_query(struct in_multi *,
  115                     struct igmp_ifsoftc *, int, /*const*/ struct igmpv3 *);
  116 static int      igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
  117                     /*const*/ struct igmp *);
  118 static int      igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
  119                     /*const*/ struct igmp *);
  120 static void     igmp_intr(struct mbuf *);
  121 static int      igmp_isgroupreported(const struct in_addr);
  122 static struct mbuf *
  123                 igmp_ra_alloc(void);
  124 #ifdef KTR
  125 static char *   igmp_rec_type_to_str(const int);
  126 #endif
  127 static void     igmp_set_version(struct igmp_ifsoftc *, const int);
  128 static void     igmp_slowtimo_vnet(void);
  129 static int      igmp_v1v2_queue_report(struct in_multi *, const int);
  130 static void     igmp_v1v2_process_group_timer(struct in_multi *, const int);
  131 static void     igmp_v1v2_process_querier_timers(struct igmp_ifsoftc *);
  132 static void     igmp_v2_update_group(struct in_multi *, const int);
  133 static void     igmp_v3_cancel_link_timers(struct igmp_ifsoftc *);
  134 static void     igmp_v3_dispatch_general_query(struct igmp_ifsoftc *);
  135 static struct mbuf *
  136                 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
  137 static int      igmp_v3_enqueue_group_record(struct mbufq *,
  138                     struct in_multi *, const int, const int, const int);
  139 static int      igmp_v3_enqueue_filter_change(struct mbufq *,
  140                     struct in_multi *);
  141 static void     igmp_v3_process_group_timers(struct in_multi_head *,
  142                     struct mbufq *, struct mbufq *, struct in_multi *,
  143                     const int);
  144 static int      igmp_v3_merge_state_changes(struct in_multi *,
  145                     struct mbufq *);
  146 static void     igmp_v3_suppress_group_record(struct in_multi *);
  147 static int      sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
  148 static int      sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
  149 static int      sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS);
  150 static int      sysctl_igmp_stat(SYSCTL_HANDLER_ARGS);
  151 
  152 static const struct netisr_handler igmp_nh = {
  153         .nh_name = "igmp",
  154         .nh_handler = igmp_intr,
  155         .nh_proto = NETISR_IGMP,
  156         .nh_policy = NETISR_POLICY_SOURCE,
  157 };
  158 
  159 /*
  160  * System-wide globals.
  161  *
  162  * Unlocked access to these is OK, except for the global IGMP output
  163  * queue. The IGMP subsystem lock ends up being system-wide for the moment,
  164  * because all VIMAGEs have to share a global output queue, as netisrs
  165  * themselves are not virtualized.
  166  *
  167  * Locking:
  168  *  * The permitted lock order is: IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
  169  *    Any may be taken independently; if any are held at the same
  170  *    time, the above lock order must be followed.
  171  *  * All output is delegated to the netisr.
  172  *    Now that Giant has been eliminated, the netisr may be inlined.
  173  *  * IN_MULTI_LIST_LOCK covers in_multi.
  174  *  * IGMP_LOCK covers igmp_ifsoftc and any global variables in this file,
  175  *    including the output queue.
  176  *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
  177  *    per-link state iterators.
  178  *  * igmp_ifsoftc is valid as long as PF_INET is attached to the interface,
  179  *    therefore it is not refcounted.
  180  *    We allow unlocked reads of igmp_ifsoftc when accessed via in_multi.
  181  *
  182  * Reference counting
  183  *  * IGMP acquires its own reference every time an in_multi is passed to
  184  *    it and the group is being joined for the first time.
  185  *  * IGMP releases its reference(s) on in_multi in a deferred way,
  186  *    because the operations which process the release run as part of
  187  *    a loop whose control variables are directly affected by the release
  188  *    (that, and not recursing on the IF_ADDR_LOCK).
  189  *
  190  * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds
  191  * to a vnet in ifp->if_vnet.
  192  *
  193  * SMPng: XXX We may potentially race operations on ifma_protospec.
  194  * The problem is that we currently lack a clean way of taking the
  195  * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing,
  196  * as anything which modifies ifma needs to be covered by that lock.
  197  * So check for ifma_protospec being NULL before proceeding.
  198  */
  199 struct mtx               igmp_mtx;
  200 
  201 struct mbuf             *m_raopt;                /* Router Alert option */
  202 static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
  203 
  204 /*
  205  * VIMAGE-wide globals.
  206  *
  207  * The IGMPv3 timers themselves need to run per-image, however, for
  208  * historical reasons, timers run globally.  This needs to be improved.
  209  * An ifnet can only be in one vimage at a time, and the loopback
  210  * ifnet, loif, is itself virtualized.
  211  * It would otherwise be possible to seriously hose IGMP state,
  212  * and create inconsistencies in upstream multicast routing, if you have
  213  * multiple VIMAGEs running on the same link joining different multicast
  214  * groups, UNLESS the "primary IP address" is different. This is because
  215  * IGMP for IPv4 does not force link-local addresses to be used for each
  216  * node, unlike MLD for IPv6.
  217  * Obviously the IGMPv3 per-interface state has per-vimage granularity
  218  * also as a result.
  219  *
  220  * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
  221  * policy to control the address used by IGMP on the link.
  222  */
  223 VNET_DEFINE_STATIC(int, interface_timers_running);      /* IGMPv3 general
  224                                                          * query response */
  225 VNET_DEFINE_STATIC(int, state_change_timers_running);   /* IGMPv3 state-change
  226                                                          * retransmit */
  227 VNET_DEFINE_STATIC(int, current_state_timers_running);  /* IGMPv1/v2 host
  228                                                          * report; IGMPv3 g/sg
  229                                                          * query response */
  230 
  231 #define V_interface_timers_running      VNET(interface_timers_running)
  232 #define V_state_change_timers_running   VNET(state_change_timers_running)
  233 #define V_current_state_timers_running  VNET(current_state_timers_running)
  234 
  235 VNET_PCPUSTAT_DEFINE(struct igmpstat, igmpstat);
  236 VNET_PCPUSTAT_SYSINIT(igmpstat);
  237 VNET_PCPUSTAT_SYSUNINIT(igmpstat);
  238 
  239 VNET_DEFINE_STATIC(LIST_HEAD(, igmp_ifsoftc), igi_head) =
  240     LIST_HEAD_INITIALIZER(igi_head);
  241 VNET_DEFINE_STATIC(struct timeval, igmp_gsrdelay) = {10, 0};
  242 
  243 #define V_igi_head                      VNET(igi_head)
  244 #define V_igmp_gsrdelay                 VNET(igmp_gsrdelay)
  245 
  246 VNET_DEFINE_STATIC(int, igmp_recvifkludge) = 1;
  247 VNET_DEFINE_STATIC(int, igmp_sendra) = 1;
  248 VNET_DEFINE_STATIC(int, igmp_sendlocal) = 1;
  249 VNET_DEFINE_STATIC(int, igmp_v1enable) = 1;
  250 VNET_DEFINE_STATIC(int, igmp_v2enable) = 1;
  251 VNET_DEFINE_STATIC(int, igmp_legacysupp);
  252 VNET_DEFINE_STATIC(int, igmp_default_version) = IGMP_VERSION_3;
  253 
  254 #define V_igmp_recvifkludge             VNET(igmp_recvifkludge)
  255 #define V_igmp_sendra                   VNET(igmp_sendra)
  256 #define V_igmp_sendlocal                VNET(igmp_sendlocal)
  257 #define V_igmp_v1enable                 VNET(igmp_v1enable)
  258 #define V_igmp_v2enable                 VNET(igmp_v2enable)
  259 #define V_igmp_legacysupp               VNET(igmp_legacysupp)
  260 #define V_igmp_default_version          VNET(igmp_default_version)
  261 
  262 /*
  263  * Virtualized sysctls.
  264  */
  265 SYSCTL_PROC(_net_inet_igmp, IGMPCTL_STATS, stats,
  266     CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  267     &VNET_NAME(igmpstat), 0, sysctl_igmp_stat, "S,igmpstat",
  268     "IGMP statistics (struct igmpstat, netinet/igmp_var.h)");
  269 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_VNET | CTLFLAG_RW,
  270     &VNET_NAME(igmp_recvifkludge), 0,
  271     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
  272 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_VNET | CTLFLAG_RW,
  273     &VNET_NAME(igmp_sendra), 0,
  274     "Send IP Router Alert option in IGMPv2/v3 messages");
  275 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_VNET | CTLFLAG_RW,
  276     &VNET_NAME(igmp_sendlocal), 0,
  277     "Send IGMP membership reports for 224.0.0.0/24 groups");
  278 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_VNET | CTLFLAG_RW,
  279     &VNET_NAME(igmp_v1enable), 0,
  280     "Enable backwards compatibility with IGMPv1");
  281 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_VNET | CTLFLAG_RW,
  282     &VNET_NAME(igmp_v2enable), 0,
  283     "Enable backwards compatibility with IGMPv2");
  284 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_VNET | CTLFLAG_RW,
  285     &VNET_NAME(igmp_legacysupp), 0,
  286     "Allow v1/v2 reports to suppress v3 group responses");
  287 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
  288     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  289     &VNET_NAME(igmp_default_version), 0, sysctl_igmp_default_version, "I",
  290     "Default version of IGMP to run on each interface");
  291 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
  292     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  293     &VNET_NAME(igmp_gsrdelay.tv_sec), 0, sysctl_igmp_gsr, "I",
  294     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
  295 
  296 /*
  297  * Non-virtualized sysctls.
  298  */
  299 static SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo,
  300     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_igmp_ifinfo,
  301     "Per-interface IGMPv3 state");
  302 
  303 static __inline void
  304 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
  305 {
  306 
  307 #ifdef VIMAGE
  308         m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
  309 #endif /* VIMAGE */
  310         m->m_pkthdr.rcvif = ifp;
  311         m->m_pkthdr.flowid = ifp->if_index;
  312 }
  313 
  314 static __inline void
  315 igmp_scrub_context(struct mbuf *m)
  316 {
  317 
  318         m->m_pkthdr.PH_loc.ptr = NULL;
  319         m->m_pkthdr.flowid = 0;
  320 }
  321 
  322 /*
  323  * Restore context from a queued IGMP output chain.
  324  * Return saved ifindex.
  325  *
  326  * VIMAGE: The assertion is there to make sure that we
  327  * actually called CURVNET_SET() with what's in the mbuf chain.
  328  */
  329 static __inline uint32_t
  330 igmp_restore_context(struct mbuf *m)
  331 {
  332 
  333 #ifdef notyet
  334 #if defined(VIMAGE) && defined(INVARIANTS)
  335         KASSERT(curvnet == (m->m_pkthdr.PH_loc.ptr),
  336             ("%s: called when curvnet was not restored", __func__));
  337 #endif
  338 #endif
  339         return (m->m_pkthdr.flowid);
  340 }
  341 
  342 /*
  343  * IGMP statistics.
  344  */
  345 static int
  346 sysctl_igmp_stat(SYSCTL_HANDLER_ARGS)
  347 {
  348         struct igmpstat igps0;
  349         int error;
  350         char *p;
  351 
  352         error = sysctl_wire_old_buffer(req, sizeof(struct igmpstat));
  353         if (error)
  354                 return (error);
  355 
  356         if (req->oldptr != NULL) {
  357                 if (req->oldlen < sizeof(struct igmpstat))
  358                         error = ENOMEM;
  359                 else {
  360                         /*
  361                          * Copy the counters, and explicitly set the struct's
  362                          * version and length fields.
  363                          */
  364                         COUNTER_ARRAY_COPY(VNET(igmpstat), &igps0,
  365                             sizeof(struct igmpstat) / sizeof(uint64_t));
  366                         igps0.igps_version = IGPS_VERSION_3;
  367                         igps0.igps_len = IGPS_VERSION3_LEN;
  368                         error = SYSCTL_OUT(req, &igps0,
  369                             sizeof(struct igmpstat));
  370                 }
  371         } else
  372                 req->validlen = sizeof(struct igmpstat);
  373         if (error)
  374                 goto out;
  375         if (req->newptr != NULL) {
  376                 if (req->newlen < sizeof(struct igmpstat))
  377                         error = ENOMEM;
  378                 else
  379                         error = SYSCTL_IN(req, &igps0,
  380                             sizeof(igps0));
  381                 if (error)
  382                         goto out;
  383                 /*
  384                  * igps0 must be "all zero".
  385                  */
  386                 p = (char *)&igps0;
  387                 while (p < (char *)&igps0 + sizeof(igps0) && *p == '\0')
  388                         p++;
  389                 if (p != (char *)&igps0 + sizeof(igps0)) {
  390                         error = EINVAL;
  391                         goto out;
  392                 }
  393                 COUNTER_ARRAY_ZERO(VNET(igmpstat),
  394                     sizeof(struct igmpstat) / sizeof(uint64_t));
  395         }
  396 out:
  397         return (error);
  398 }
  399 
  400 /*
  401  * Retrieve or set default IGMP version.
  402  *
  403  * VIMAGE: Assume curvnet set by caller.
  404  * SMPng: NOTE: Serialized by IGMP lock.
  405  */
  406 static int
  407 sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
  408 {
  409         int      error;
  410         int      new;
  411 
  412         error = sysctl_wire_old_buffer(req, sizeof(int));
  413         if (error)
  414                 return (error);
  415 
  416         IGMP_LOCK();
  417 
  418         new = V_igmp_default_version;
  419 
  420         error = sysctl_handle_int(oidp, &new, 0, req);
  421         if (error || !req->newptr)
  422                 goto out_locked;
  423 
  424         if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
  425                 error = EINVAL;
  426                 goto out_locked;
  427         }
  428 
  429         CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
  430              V_igmp_default_version, new);
  431 
  432         V_igmp_default_version = new;
  433 
  434 out_locked:
  435         IGMP_UNLOCK();
  436         return (error);
  437 }
  438 
  439 /*
  440  * Retrieve or set threshold between group-source queries in seconds.
  441  *
  442  * VIMAGE: Assume curvnet set by caller.
  443  * SMPng: NOTE: Serialized by IGMP lock.
  444  */
  445 static int
  446 sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS)
  447 {
  448         int error;
  449         int i;
  450 
  451         error = sysctl_wire_old_buffer(req, sizeof(int));
  452         if (error)
  453                 return (error);
  454 
  455         IGMP_LOCK();
  456 
  457         i = V_igmp_gsrdelay.tv_sec;
  458 
  459         error = sysctl_handle_int(oidp, &i, 0, req);
  460         if (error || !req->newptr)
  461                 goto out_locked;
  462 
  463         if (i < -1 || i >= 60) {
  464                 error = EINVAL;
  465                 goto out_locked;
  466         }
  467 
  468         CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d",
  469              V_igmp_gsrdelay.tv_sec, i);
  470         V_igmp_gsrdelay.tv_sec = i;
  471 
  472 out_locked:
  473         IGMP_UNLOCK();
  474         return (error);
  475 }
  476 
  477 /*
  478  * Expose struct igmp_ifsoftc to userland, keyed by ifindex.
  479  * For use by ifmcstat(8).
  480  *
  481  * SMPng: NOTE: Does an unlocked ifindex space read.
  482  * VIMAGE: Assume curvnet set by caller. The node handler itself
  483  * is not directly virtualized.
  484  */
  485 static int
  486 sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
  487 {
  488         struct epoch_tracker     et;
  489         int                     *name;
  490         int                      error;
  491         u_int                    namelen;
  492         struct ifnet            *ifp;
  493         struct igmp_ifsoftc     *igi;
  494 
  495         name = (int *)arg1;
  496         namelen = arg2;
  497 
  498         if (req->newptr != NULL)
  499                 return (EPERM);
  500 
  501         if (namelen != 1)
  502                 return (EINVAL);
  503 
  504         error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo));
  505         if (error)
  506                 return (error);
  507 
  508         IN_MULTI_LIST_LOCK();
  509         IGMP_LOCK();
  510 
  511         error = ENOENT;
  512 
  513         NET_EPOCH_ENTER(et);
  514         ifp = ifnet_byindex(name[0]);
  515         NET_EPOCH_EXIT(et);
  516         if (ifp == NULL)
  517                 goto out_locked;
  518 
  519         LIST_FOREACH(igi, &V_igi_head, igi_link) {
  520                 if (ifp == igi->igi_ifp) {
  521                         struct igmp_ifinfo info;
  522 
  523                         info.igi_version = igi->igi_version;
  524                         info.igi_v1_timer = igi->igi_v1_timer;
  525                         info.igi_v2_timer = igi->igi_v2_timer;
  526                         info.igi_v3_timer = igi->igi_v3_timer;
  527                         info.igi_flags = igi->igi_flags;
  528                         info.igi_rv = igi->igi_rv;
  529                         info.igi_qi = igi->igi_qi;
  530                         info.igi_qri = igi->igi_qri;
  531                         info.igi_uri = igi->igi_uri;
  532                         error = SYSCTL_OUT(req, &info, sizeof(info));
  533                         break;
  534                 }
  535         }
  536 
  537 out_locked:
  538         IGMP_UNLOCK();
  539         IN_MULTI_LIST_UNLOCK();
  540         return (error);
  541 }
  542 
  543 /*
  544  * Dispatch an entire queue of pending packet chains
  545  * using the netisr.
  546  * VIMAGE: Assumes the vnet pointer has been set.
  547  */
  548 static void
  549 igmp_dispatch_queue(struct mbufq *mq, int limit, const int loop)
  550 {
  551         struct epoch_tracker et;
  552         struct mbuf *m;
  553 
  554         NET_EPOCH_ENTER(et);
  555         while ((m = mbufq_dequeue(mq)) != NULL) {
  556                 CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, mq, m);
  557                 if (loop)
  558                         m->m_flags |= M_IGMP_LOOP;
  559                 netisr_dispatch(NETISR_IGMP, m);
  560                 if (--limit == 0)
  561                         break;
  562         }
  563         NET_EPOCH_EXIT(et);
  564 }
  565 
  566 /*
  567  * Filter outgoing IGMP report state by group.
  568  *
  569  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
  570  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
  571  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
  572  * this may break certain IGMP snooping switches which rely on the old
  573  * report behaviour.
  574  *
  575  * Return zero if the given group is one for which IGMP reports
  576  * should be suppressed, or non-zero if reports should be issued.
  577  */
  578 static __inline int
  579 igmp_isgroupreported(const struct in_addr addr)
  580 {
  581 
  582         if (in_allhosts(addr) ||
  583             ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
  584                 return (0);
  585 
  586         return (1);
  587 }
  588 
  589 /*
  590  * Construct a Router Alert option to use in outgoing packets.
  591  */
  592 static struct mbuf *
  593 igmp_ra_alloc(void)
  594 {
  595         struct mbuf     *m;
  596         struct ipoption *p;
  597 
  598         m = m_get(M_WAITOK, MT_DATA);
  599         p = mtod(m, struct ipoption *);
  600         p->ipopt_dst.s_addr = INADDR_ANY;
  601         p->ipopt_list[0] = (char)IPOPT_RA;      /* Router Alert Option */
  602         p->ipopt_list[1] = 0x04;                /* 4 bytes long */
  603         p->ipopt_list[2] = IPOPT_EOL;           /* End of IP option list */
  604         p->ipopt_list[3] = 0x00;                /* pad byte */
  605         m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
  606 
  607         return (m);
  608 }
  609 
  610 /*
  611  * Attach IGMP when PF_INET is attached to an interface.
  612  */
  613 struct igmp_ifsoftc *
  614 igmp_domifattach(struct ifnet *ifp)
  615 {
  616         struct igmp_ifsoftc *igi;
  617 
  618         CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
  619             __func__, ifp, ifp->if_xname);
  620 
  621         IGMP_LOCK();
  622 
  623         igi = igi_alloc_locked(ifp);
  624         if (!(ifp->if_flags & IFF_MULTICAST))
  625                 igi->igi_flags |= IGIF_SILENT;
  626 
  627         IGMP_UNLOCK();
  628 
  629         return (igi);
  630 }
  631 
  632 /*
  633  * VIMAGE: assume curvnet set by caller.
  634  */
  635 static struct igmp_ifsoftc *
  636 igi_alloc_locked(/*const*/ struct ifnet *ifp)
  637 {
  638         struct igmp_ifsoftc *igi;
  639 
  640         IGMP_LOCK_ASSERT();
  641 
  642         igi = malloc(sizeof(struct igmp_ifsoftc), M_IGMP, M_NOWAIT|M_ZERO);
  643         if (igi == NULL)
  644                 goto out;
  645 
  646         igi->igi_ifp = ifp;
  647         igi->igi_version = V_igmp_default_version;
  648         igi->igi_flags = 0;
  649         igi->igi_rv = IGMP_RV_INIT;
  650         igi->igi_qi = IGMP_QI_INIT;
  651         igi->igi_qri = IGMP_QRI_INIT;
  652         igi->igi_uri = IGMP_URI_INIT;
  653         mbufq_init(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
  654 
  655         LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
  656 
  657         CTR2(KTR_IGMPV3, "allocate igmp_ifsoftc for ifp %p(%s)",
  658              ifp, ifp->if_xname);
  659 
  660 out:
  661         return (igi);
  662 }
  663 
  664 /*
  665  * Hook for ifdetach.
  666  *
  667  * NOTE: Some finalization tasks need to run before the protocol domain
  668  * is detached, but also before the link layer does its cleanup.
  669  *
  670  * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK().
  671  * XXX This is also bitten by unlocked ifma_protospec access.
  672  */
  673 void
  674 igmp_ifdetach(struct ifnet *ifp)
  675 {
  676         struct epoch_tracker     et;
  677         struct igmp_ifsoftc     *igi;
  678         struct ifmultiaddr      *ifma;
  679         struct in_multi         *inm;
  680         struct in_multi_head inm_free_tmp;
  681         CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
  682             ifp->if_xname);
  683 
  684         SLIST_INIT(&inm_free_tmp);
  685         IGMP_LOCK();
  686 
  687         igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
  688         if (igi->igi_version == IGMP_VERSION_3) {
  689                 IF_ADDR_WLOCK(ifp);
  690                 NET_EPOCH_ENTER(et);
  691                 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
  692                         inm = inm_ifmultiaddr_get_inm(ifma);
  693                         if (inm == NULL)
  694                                 continue;
  695                         if (inm->inm_state == IGMP_LEAVING_MEMBER)
  696                                 inm_rele_locked(&inm_free_tmp, inm);
  697                         inm_clear_recorded(inm);
  698                 }
  699                 NET_EPOCH_EXIT(et);
  700                 IF_ADDR_WUNLOCK(ifp);
  701                 inm_release_list_deferred(&inm_free_tmp);
  702         }
  703         IGMP_UNLOCK();
  704 
  705 }
  706 
  707 /*
  708  * Hook for domifdetach.
  709  */
  710 void
  711 igmp_domifdetach(struct ifnet *ifp)
  712 {
  713 
  714         CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
  715             __func__, ifp, ifp->if_xname);
  716 
  717         IGMP_LOCK();
  718         igi_delete_locked(ifp);
  719         IGMP_UNLOCK();
  720 }
  721 
  722 static void
  723 igi_delete_locked(const struct ifnet *ifp)
  724 {
  725         struct igmp_ifsoftc *igi, *tigi;
  726 
  727         CTR3(KTR_IGMPV3, "%s: freeing igmp_ifsoftc for ifp %p(%s)",
  728             __func__, ifp, ifp->if_xname);
  729 
  730         IGMP_LOCK_ASSERT();
  731 
  732         LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) {
  733                 if (igi->igi_ifp == ifp) {
  734                         /*
  735                          * Free deferred General Query responses.
  736                          */
  737                         mbufq_drain(&igi->igi_gq);
  738 
  739                         LIST_REMOVE(igi, igi_link);
  740                         free(igi, M_IGMP);
  741                         return;
  742                 }
  743         }
  744 }
  745 
  746 /*
  747  * Process a received IGMPv1 query.
  748  * Return non-zero if the message should be dropped.
  749  *
  750  * VIMAGE: The curvnet pointer is derived from the input ifp.
  751  */
  752 static int
  753 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
  754     const struct igmp *igmp)
  755 {
  756         struct ifmultiaddr      *ifma;
  757         struct igmp_ifsoftc     *igi;
  758         struct in_multi         *inm;
  759 
  760         NET_EPOCH_ASSERT();
  761 
  762         /*
  763          * IGMPv1 Host Mmembership Queries SHOULD always be addressed to
  764          * 224.0.0.1. They are always treated as General Queries.
  765          * igmp_group is always ignored. Do not drop it as a userland
  766          * daemon may wish to see it.
  767          * XXX SMPng: unlocked increments in igmpstat assumed atomic.
  768          */
  769         if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
  770                 IGMPSTAT_INC(igps_rcv_badqueries);
  771                 return (0);
  772         }
  773         IGMPSTAT_INC(igps_rcv_gen_queries);
  774 
  775         IN_MULTI_LIST_LOCK();
  776         IGMP_LOCK();
  777 
  778         igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
  779         KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
  780 
  781         if (igi->igi_flags & IGIF_LOOPBACK) {
  782                 CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
  783                     ifp, ifp->if_xname);
  784                 goto out_locked;
  785         }
  786 
  787         /*
  788          * Switch to IGMPv1 host compatibility mode.
  789          */
  790         igmp_set_version(igi, IGMP_VERSION_1);
  791 
  792         CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname);
  793 
  794         /*
  795          * Start the timers in all of our group records
  796          * for the interface on which the query arrived,
  797          * except those which are already running.
  798          */
  799         CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
  800                 inm = inm_ifmultiaddr_get_inm(ifma);
  801                 if (inm == NULL)
  802                         continue;
  803                 if (inm->inm_timer != 0)
  804                         continue;
  805                 switch (inm->inm_state) {
  806                 case IGMP_NOT_MEMBER:
  807                 case IGMP_SILENT_MEMBER:
  808                         break;
  809                 case IGMP_G_QUERY_PENDING_MEMBER:
  810                 case IGMP_SG_QUERY_PENDING_MEMBER:
  811                 case IGMP_REPORTING_MEMBER:
  812                 case IGMP_IDLE_MEMBER:
  813                 case IGMP_LAZY_MEMBER:
  814                 case IGMP_SLEEPING_MEMBER:
  815                 case IGMP_AWAKENING_MEMBER:
  816                         inm->inm_state = IGMP_REPORTING_MEMBER;
  817                         inm->inm_timer = IGMP_RANDOM_DELAY(
  818                             IGMP_V1V2_MAX_RI * IGMP_FASTHZ);
  819                         V_current_state_timers_running = 1;
  820                         break;
  821                 case IGMP_LEAVING_MEMBER:
  822                         break;
  823                 }
  824         }
  825 
  826 out_locked:
  827         IGMP_UNLOCK();
  828         IN_MULTI_LIST_UNLOCK();
  829 
  830         return (0);
  831 }
  832 
  833 /*
  834  * Process a received IGMPv2 general or group-specific query.
  835  */
  836 static int
  837 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
  838     const struct igmp *igmp)
  839 {
  840         struct ifmultiaddr      *ifma;
  841         struct igmp_ifsoftc     *igi;
  842         struct in_multi         *inm;
  843         int                      is_general_query;
  844         uint16_t                 timer;
  845 
  846         NET_EPOCH_ASSERT();
  847 
  848         is_general_query = 0;
  849 
  850         /*
  851          * Validate address fields upfront.
  852          * XXX SMPng: unlocked increments in igmpstat assumed atomic.
  853          */
  854         if (in_nullhost(igmp->igmp_group)) {
  855                 /*
  856                  * IGMPv2 General Query.
  857                  * If this was not sent to the all-hosts group, ignore it.
  858                  */
  859                 if (!in_allhosts(ip->ip_dst))
  860                         return (0);
  861                 IGMPSTAT_INC(igps_rcv_gen_queries);
  862                 is_general_query = 1;
  863         } else {
  864                 /* IGMPv2 Group-Specific Query. */
  865                 IGMPSTAT_INC(igps_rcv_group_queries);
  866         }
  867 
  868         IN_MULTI_LIST_LOCK();
  869         IGMP_LOCK();
  870 
  871         igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
  872         KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
  873 
  874         if (igi->igi_flags & IGIF_LOOPBACK) {
  875                 CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
  876                     ifp, ifp->if_xname);
  877                 goto out_locked;
  878         }
  879 
  880         /*
  881          * Ignore v2 query if in v1 Compatibility Mode.
  882          */
  883         if (igi->igi_version == IGMP_VERSION_1)
  884                 goto out_locked;
  885 
  886         igmp_set_version(igi, IGMP_VERSION_2);
  887 
  888         timer = igmp->igmp_code * IGMP_FASTHZ / IGMP_TIMER_SCALE;
  889         if (timer == 0)
  890                 timer = 1;
  891 
  892         if (is_general_query) {
  893                 /*
  894                  * For each reporting group joined on this
  895                  * interface, kick the report timer.
  896                  */
  897                 CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)",
  898                     ifp, ifp->if_xname);
  899                 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
  900                         inm = inm_ifmultiaddr_get_inm(ifma);
  901                         if (inm == NULL)
  902                                 continue;
  903                         igmp_v2_update_group(inm, timer);
  904                 }
  905         } else {
  906                 /*
  907                  * Group-specific IGMPv2 query, we need only
  908                  * look up the single group to process it.
  909                  */
  910                 inm = inm_lookup(ifp, igmp->igmp_group);
  911                 if (inm != NULL) {
  912                         CTR3(KTR_IGMPV3,
  913                             "process v2 query 0x%08x on ifp %p(%s)",
  914                             ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname);
  915                         igmp_v2_update_group(inm, timer);
  916                 }
  917         }
  918 
  919 out_locked:
  920         IGMP_UNLOCK();
  921         IN_MULTI_LIST_UNLOCK();
  922 
  923         return (0);
  924 }
  925 
  926 /*
  927  * Update the report timer on a group in response to an IGMPv2 query.
  928  *
  929  * If we are becoming the reporting member for this group, start the timer.
  930  * If we already are the reporting member for this group, and timer is
  931  * below the threshold, reset it.
  932  *
  933  * We may be updating the group for the first time since we switched
  934  * to IGMPv3. If we are, then we must clear any recorded source lists,
  935  * and transition to REPORTING state; the group timer is overloaded
  936  * for group and group-source query responses.
  937  *
  938  * Unlike IGMPv3, the delay per group should be jittered
  939  * to avoid bursts of IGMPv2 reports.
  940  */
  941 static void
  942 igmp_v2_update_group(struct in_multi *inm, const int timer)
  943 {
  944 
  945         CTR4(KTR_IGMPV3, "0x%08x: %s/%s timer=%d", __func__,
  946             ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname, timer);
  947 
  948         IN_MULTI_LIST_LOCK_ASSERT();
  949 
  950         switch (inm->inm_state) {
  951         case IGMP_NOT_MEMBER:
  952         case IGMP_SILENT_MEMBER:
  953                 break;
  954         case IGMP_REPORTING_MEMBER:
  955                 if (inm->inm_timer != 0 &&
  956                     inm->inm_timer <= timer) {
  957                         CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, "
  958                             "skipping.", __func__);
  959                         break;
  960                 }
  961                 /* FALLTHROUGH */
  962         case IGMP_SG_QUERY_PENDING_MEMBER:
  963         case IGMP_G_QUERY_PENDING_MEMBER:
  964         case IGMP_IDLE_MEMBER:
  965         case IGMP_LAZY_MEMBER:
  966         case IGMP_AWAKENING_MEMBER:
  967                 CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__);
  968                 inm->inm_state = IGMP_REPORTING_MEMBER;
  969                 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
  970                 V_current_state_timers_running = 1;
  971                 break;
  972         case IGMP_SLEEPING_MEMBER:
  973                 CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__);
  974                 inm->inm_state = IGMP_AWAKENING_MEMBER;
  975                 break;
  976         case IGMP_LEAVING_MEMBER:
  977                 break;
  978         }
  979 }
  980 
  981 /*
  982  * Process a received IGMPv3 general, group-specific or
  983  * group-and-source-specific query.
  984  * Assumes m has already been pulled up to the full IGMP message length.
  985  * Return 0 if successful, otherwise an appropriate error code is returned.
  986  */
  987 static int
  988 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
  989     /*const*/ struct igmpv3 *igmpv3)
  990 {
  991         struct igmp_ifsoftc     *igi;
  992         struct in_multi         *inm;
  993         int                      is_general_query;
  994         uint32_t                 maxresp, nsrc, qqi;
  995         uint16_t                 timer;
  996         uint8_t                  qrv;
  997 
  998         is_general_query = 0;
  999 
 1000         CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname);
 1001 
 1002         maxresp = igmpv3->igmp_code;    /* in 1/10ths of a second */
 1003         if (maxresp >= 128) {
 1004                 maxresp = IGMP_MANT(igmpv3->igmp_code) <<
 1005                           (IGMP_EXP(igmpv3->igmp_code) + 3);
 1006         }
 1007 
 1008         /*
 1009          * Robustness must never be less than 2 for on-wire IGMPv3.
 1010          * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
 1011          * an exception for interfaces whose IGMPv3 state changes
 1012          * are redirected to loopback (e.g. MANET).
 1013          */
 1014         qrv = IGMP_QRV(igmpv3->igmp_misc);
 1015         if (qrv < 2) {
 1016                 CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__,
 1017                     qrv, IGMP_RV_INIT);
 1018                 qrv = IGMP_RV_INIT;
 1019         }
 1020 
 1021         qqi = igmpv3->igmp_qqi;
 1022         if (qqi >= 128) {
 1023                 qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
 1024                      (IGMP_EXP(igmpv3->igmp_qqi) + 3);
 1025         }
 1026 
 1027         timer = maxresp * IGMP_FASTHZ / IGMP_TIMER_SCALE;
 1028         if (timer == 0)
 1029                 timer = 1;
 1030 
 1031         nsrc = ntohs(igmpv3->igmp_numsrc);
 1032 
 1033         /*
 1034          * Validate address fields and versions upfront before
 1035          * accepting v3 query.
 1036          * XXX SMPng: Unlocked access to igmpstat counters here.
 1037          */
 1038         if (in_nullhost(igmpv3->igmp_group)) {
 1039                 /*
 1040                  * IGMPv3 General Query.
 1041                  *
 1042                  * General Queries SHOULD be directed to 224.0.0.1.
 1043                  * A general query with a source list has undefined
 1044                  * behaviour; discard it.
 1045                  */
 1046                 IGMPSTAT_INC(igps_rcv_gen_queries);
 1047                 if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
 1048                         IGMPSTAT_INC(igps_rcv_badqueries);
 1049                         return (0);
 1050                 }
 1051                 is_general_query = 1;
 1052         } else {
 1053                 /* Group or group-source specific query. */
 1054                 if (nsrc == 0)
 1055                         IGMPSTAT_INC(igps_rcv_group_queries);
 1056                 else
 1057                         IGMPSTAT_INC(igps_rcv_gsr_queries);
 1058         }
 1059 
 1060         IN_MULTI_LIST_LOCK();
 1061         IGMP_LOCK();
 1062 
 1063         igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 1064         KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
 1065 
 1066         if (igi->igi_flags & IGIF_LOOPBACK) {
 1067                 CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)",
 1068                     ifp, ifp->if_xname);
 1069                 goto out_locked;
 1070         }
 1071 
 1072         /*
 1073          * Discard the v3 query if we're in Compatibility Mode.
 1074          * The RFC is not obviously worded that hosts need to stay in
 1075          * compatibility mode until the Old Version Querier Present
 1076          * timer expires.
 1077          */
 1078         if (igi->igi_version != IGMP_VERSION_3) {
 1079                 CTR3(KTR_IGMPV3, "ignore v3 query in v%d mode on ifp %p(%s)",
 1080                     igi->igi_version, ifp, ifp->if_xname);
 1081                 goto out_locked;
 1082         }
 1083 
 1084         igmp_set_version(igi, IGMP_VERSION_3);
 1085         igi->igi_rv = qrv;
 1086         igi->igi_qi = qqi;
 1087         igi->igi_qri = maxresp;
 1088 
 1089         CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi,
 1090             maxresp);
 1091 
 1092         if (is_general_query) {
 1093                 /*
 1094                  * Schedule a current-state report on this ifp for
 1095                  * all groups, possibly containing source lists.
 1096                  * If there is a pending General Query response
 1097                  * scheduled earlier than the selected delay, do
 1098                  * not schedule any other reports.
 1099                  * Otherwise, reset the interface timer.
 1100                  */
 1101                 CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)",
 1102                     ifp, ifp->if_xname);
 1103                 if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
 1104                         igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
 1105                         V_interface_timers_running = 1;
 1106                 }
 1107         } else {
 1108                 /*
 1109                  * Group-source-specific queries are throttled on
 1110                  * a per-group basis to defeat denial-of-service attempts.
 1111                  * Queries for groups we are not a member of on this
 1112                  * link are simply ignored.
 1113                  */
 1114                 inm = inm_lookup(ifp, igmpv3->igmp_group);
 1115                 if (inm == NULL)
 1116                         goto out_locked;
 1117                 if (nsrc > 0) {
 1118                         if (!ratecheck(&inm->inm_lastgsrtv,
 1119                             &V_igmp_gsrdelay)) {
 1120                                 CTR1(KTR_IGMPV3, "%s: GS query throttled.",
 1121                                     __func__);
 1122                                 IGMPSTAT_INC(igps_drop_gsr_queries);
 1123                                 goto out_locked;
 1124                         }
 1125                 }
 1126                 CTR3(KTR_IGMPV3, "process v3 0x%08x query on ifp %p(%s)",
 1127                      ntohl(igmpv3->igmp_group.s_addr), ifp, ifp->if_xname);
 1128                 /*
 1129                  * If there is a pending General Query response
 1130                  * scheduled sooner than the selected delay, no
 1131                  * further report need be scheduled.
 1132                  * Otherwise, prepare to respond to the
 1133                  * group-specific or group-and-source query.
 1134                  */
 1135                 if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer)
 1136                         igmp_input_v3_group_query(inm, igi, timer, igmpv3);
 1137         }
 1138 
 1139 out_locked:
 1140         IGMP_UNLOCK();
 1141         IN_MULTI_LIST_UNLOCK();
 1142 
 1143         return (0);
 1144 }
 1145 
 1146 /*
 1147  * Process a received IGMPv3 group-specific or group-and-source-specific
 1148  * query.
 1149  * Return <0 if any error occurred. Currently this is ignored.
 1150  */
 1151 static int
 1152 igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifsoftc *igi,
 1153     int timer, /*const*/ struct igmpv3 *igmpv3)
 1154 {
 1155         int                      retval;
 1156         uint16_t                 nsrc;
 1157 
 1158         IN_MULTI_LIST_LOCK_ASSERT();
 1159         IGMP_LOCK_ASSERT();
 1160 
 1161         retval = 0;
 1162 
 1163         switch (inm->inm_state) {
 1164         case IGMP_NOT_MEMBER:
 1165         case IGMP_SILENT_MEMBER:
 1166         case IGMP_SLEEPING_MEMBER:
 1167         case IGMP_LAZY_MEMBER:
 1168         case IGMP_AWAKENING_MEMBER:
 1169         case IGMP_IDLE_MEMBER:
 1170         case IGMP_LEAVING_MEMBER:
 1171                 return (retval);
 1172                 break;
 1173         case IGMP_REPORTING_MEMBER:
 1174         case IGMP_G_QUERY_PENDING_MEMBER:
 1175         case IGMP_SG_QUERY_PENDING_MEMBER:
 1176                 break;
 1177         }
 1178 
 1179         nsrc = ntohs(igmpv3->igmp_numsrc);
 1180 
 1181         /*
 1182          * Deal with group-specific queries upfront.
 1183          * If any group query is already pending, purge any recorded
 1184          * source-list state if it exists, and schedule a query response
 1185          * for this group-specific query.
 1186          */
 1187         if (nsrc == 0) {
 1188                 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
 1189                     inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
 1190                         inm_clear_recorded(inm);
 1191                         timer = min(inm->inm_timer, timer);
 1192                 }
 1193                 inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
 1194                 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 1195                 V_current_state_timers_running = 1;
 1196                 return (retval);
 1197         }
 1198 
 1199         /*
 1200          * Deal with the case where a group-and-source-specific query has
 1201          * been received but a group-specific query is already pending.
 1202          */
 1203         if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
 1204                 timer = min(inm->inm_timer, timer);
 1205                 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 1206                 V_current_state_timers_running = 1;
 1207                 return (retval);
 1208         }
 1209 
 1210         /*
 1211          * Finally, deal with the case where a group-and-source-specific
 1212          * query has been received, where a response to a previous g-s-r
 1213          * query exists, or none exists.
 1214          * In this case, we need to parse the source-list which the Querier
 1215          * has provided us with and check if we have any source list filter
 1216          * entries at T1 for these sources. If we do not, there is no need
 1217          * schedule a report and the query may be dropped.
 1218          * If we do, we must record them and schedule a current-state
 1219          * report for those sources.
 1220          * FIXME: Handling source lists larger than 1 mbuf requires that
 1221          * we pass the mbuf chain pointer down to this function, and use
 1222          * m_getptr() to walk the chain.
 1223          */
 1224         if (inm->inm_nsrc > 0) {
 1225                 const struct in_addr    *ap;
 1226                 int                      i, nrecorded;
 1227 
 1228                 ap = (const struct in_addr *)(igmpv3 + 1);
 1229                 nrecorded = 0;
 1230                 for (i = 0; i < nsrc; i++, ap++) {
 1231                         retval = inm_record_source(inm, ap->s_addr);
 1232                         if (retval < 0)
 1233                                 break;
 1234                         nrecorded += retval;
 1235                 }
 1236                 if (nrecorded > 0) {
 1237                         CTR1(KTR_IGMPV3,
 1238                             "%s: schedule response to SG query", __func__);
 1239                         inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
 1240                         inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 1241                         V_current_state_timers_running = 1;
 1242                 }
 1243         }
 1244 
 1245         return (retval);
 1246 }
 1247 
 1248 /*
 1249  * Process a received IGMPv1 host membership report.
 1250  *
 1251  * NOTE: 0.0.0.0 workaround breaks const correctness.
 1252  */
 1253 static int
 1254 igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
 1255     /*const*/ struct igmp *igmp)
 1256 {
 1257         struct in_ifaddr *ia;
 1258         struct in_multi *inm;
 1259 
 1260         IGMPSTAT_INC(igps_rcv_reports);
 1261 
 1262         if (ifp->if_flags & IFF_LOOPBACK)
 1263                 return (0);
 1264 
 1265         if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
 1266             !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
 1267                 IGMPSTAT_INC(igps_rcv_badreports);
 1268                 return (EINVAL);
 1269         }
 1270 
 1271         /*
 1272          * RFC 3376, Section 4.2.13, 9.2, 9.3:
 1273          * Booting clients may use the source address 0.0.0.0. Some
 1274          * IGMP daemons may not know how to use IP_RECVIF to determine
 1275          * the interface upon which this message was received.
 1276          * Replace 0.0.0.0 with the subnet address if told to do so.
 1277          */
 1278         if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
 1279                 IFP_TO_IA(ifp, ia);
 1280                 if (ia != NULL)
 1281                         ip->ip_src.s_addr = htonl(ia->ia_subnet);
 1282         }
 1283 
 1284         CTR3(KTR_IGMPV3, "process v1 report 0x%08x on ifp %p(%s)",
 1285              ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname);
 1286 
 1287         /*
 1288          * IGMPv1 report suppression.
 1289          * If we are a member of this group, and our membership should be
 1290          * reported, stop our group timer and transition to the 'lazy' state.
 1291          */
 1292         IN_MULTI_LIST_LOCK();
 1293         inm = inm_lookup(ifp, igmp->igmp_group);
 1294         if (inm != NULL) {
 1295                 struct igmp_ifsoftc *igi;
 1296 
 1297                 igi = inm->inm_igi;
 1298                 if (igi == NULL) {
 1299                         KASSERT(igi != NULL,
 1300                             ("%s: no igi for ifp %p", __func__, ifp));
 1301                         goto out_locked;
 1302                 }
 1303 
 1304                 IGMPSTAT_INC(igps_rcv_ourreports);
 1305 
 1306                 /*
 1307                  * If we are in IGMPv3 host mode, do not allow the
 1308                  * other host's IGMPv1 report to suppress our reports
 1309                  * unless explicitly configured to do so.
 1310                  */
 1311                 if (igi->igi_version == IGMP_VERSION_3) {
 1312                         if (V_igmp_legacysupp)
 1313                                 igmp_v3_suppress_group_record(inm);
 1314                         goto out_locked;
 1315                 }
 1316 
 1317                 inm->inm_timer = 0;
 1318 
 1319                 switch (inm->inm_state) {
 1320                 case IGMP_NOT_MEMBER:
 1321                 case IGMP_SILENT_MEMBER:
 1322                         break;
 1323                 case IGMP_IDLE_MEMBER:
 1324                 case IGMP_LAZY_MEMBER:
 1325                 case IGMP_AWAKENING_MEMBER:
 1326                         CTR3(KTR_IGMPV3,
 1327                             "report suppressed for 0x%08x on ifp %p(%s)",
 1328                             ntohl(igmp->igmp_group.s_addr), ifp,
 1329                             ifp->if_xname);
 1330                 case IGMP_SLEEPING_MEMBER:
 1331                         inm->inm_state = IGMP_SLEEPING_MEMBER;
 1332                         break;
 1333                 case IGMP_REPORTING_MEMBER:
 1334                         CTR3(KTR_IGMPV3,
 1335                             "report suppressed for 0x%08x on ifp %p(%s)",
 1336                             ntohl(igmp->igmp_group.s_addr), ifp,
 1337                             ifp->if_xname);
 1338                         if (igi->igi_version == IGMP_VERSION_1)
 1339                                 inm->inm_state = IGMP_LAZY_MEMBER;
 1340                         else if (igi->igi_version == IGMP_VERSION_2)
 1341                                 inm->inm_state = IGMP_SLEEPING_MEMBER;
 1342                         break;
 1343                 case IGMP_G_QUERY_PENDING_MEMBER:
 1344                 case IGMP_SG_QUERY_PENDING_MEMBER:
 1345                 case IGMP_LEAVING_MEMBER:
 1346                         break;
 1347                 }
 1348         }
 1349 
 1350 out_locked:
 1351         IN_MULTI_LIST_UNLOCK();
 1352 
 1353         return (0);
 1354 }
 1355 
 1356 /*
 1357  * Process a received IGMPv2 host membership report.
 1358  *
 1359  * NOTE: 0.0.0.0 workaround breaks const correctness.
 1360  */
 1361 static int
 1362 igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
 1363     /*const*/ struct igmp *igmp)
 1364 {
 1365         struct in_ifaddr *ia;
 1366         struct in_multi *inm;
 1367 
 1368         /*
 1369          * Make sure we don't hear our own membership report.  Fast
 1370          * leave requires knowing that we are the only member of a
 1371          * group.
 1372          */
 1373         IFP_TO_IA(ifp, ia);
 1374         if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
 1375                 return (0);
 1376         }
 1377 
 1378         IGMPSTAT_INC(igps_rcv_reports);
 1379 
 1380         if (ifp->if_flags & IFF_LOOPBACK) {
 1381                 return (0);
 1382         }
 1383 
 1384         if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
 1385             !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
 1386                 IGMPSTAT_INC(igps_rcv_badreports);
 1387                 return (EINVAL);
 1388         }
 1389 
 1390         /*
 1391          * RFC 3376, Section 4.2.13, 9.2, 9.3:
 1392          * Booting clients may use the source address 0.0.0.0. Some
 1393          * IGMP daemons may not know how to use IP_RECVIF to determine
 1394          * the interface upon which this message was received.
 1395          * Replace 0.0.0.0 with the subnet address if told to do so.
 1396          */
 1397         if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
 1398                 if (ia != NULL)
 1399                         ip->ip_src.s_addr = htonl(ia->ia_subnet);
 1400         }
 1401 
 1402         CTR3(KTR_IGMPV3, "process v2 report 0x%08x on ifp %p(%s)",
 1403              ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname);
 1404 
 1405         /*
 1406          * IGMPv2 report suppression.
 1407          * If we are a member of this group, and our membership should be
 1408          * reported, and our group timer is pending or about to be reset,
 1409          * stop our group timer by transitioning to the 'lazy' state.
 1410          */
 1411         IN_MULTI_LIST_LOCK();
 1412         inm = inm_lookup(ifp, igmp->igmp_group);
 1413         if (inm != NULL) {
 1414                 struct igmp_ifsoftc *igi;
 1415 
 1416                 igi = inm->inm_igi;
 1417                 KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp));
 1418 
 1419                 IGMPSTAT_INC(igps_rcv_ourreports);
 1420 
 1421                 /*
 1422                  * If we are in IGMPv3 host mode, do not allow the
 1423                  * other host's IGMPv1 report to suppress our reports
 1424                  * unless explicitly configured to do so.
 1425                  */
 1426                 if (igi->igi_version == IGMP_VERSION_3) {
 1427                         if (V_igmp_legacysupp)
 1428                                 igmp_v3_suppress_group_record(inm);
 1429                         goto out_locked;
 1430                 }
 1431 
 1432                 inm->inm_timer = 0;
 1433 
 1434                 switch (inm->inm_state) {
 1435                 case IGMP_NOT_MEMBER:
 1436                 case IGMP_SILENT_MEMBER:
 1437                 case IGMP_SLEEPING_MEMBER:
 1438                         break;
 1439                 case IGMP_REPORTING_MEMBER:
 1440                 case IGMP_IDLE_MEMBER:
 1441                 case IGMP_AWAKENING_MEMBER:
 1442                         CTR3(KTR_IGMPV3,
 1443                             "report suppressed for 0x%08x on ifp %p(%s)",
 1444                             ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname);
 1445                 case IGMP_LAZY_MEMBER:
 1446                         inm->inm_state = IGMP_LAZY_MEMBER;
 1447                         break;
 1448                 case IGMP_G_QUERY_PENDING_MEMBER:
 1449                 case IGMP_SG_QUERY_PENDING_MEMBER:
 1450                 case IGMP_LEAVING_MEMBER:
 1451                         break;
 1452                 }
 1453         }
 1454 
 1455 out_locked:
 1456         IN_MULTI_LIST_UNLOCK();
 1457 
 1458         return (0);
 1459 }
 1460 
 1461 int
 1462 igmp_input(struct mbuf **mp, int *offp, int proto)
 1463 {
 1464         int iphlen;
 1465         struct ifnet *ifp;
 1466         struct igmp *igmp;
 1467         struct ip *ip;
 1468         struct mbuf *m;
 1469         int igmplen;
 1470         int minlen;
 1471         int queryver;
 1472 
 1473         CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, *mp, *offp);
 1474 
 1475         m = *mp;
 1476         ifp = m->m_pkthdr.rcvif;
 1477         *mp = NULL;
 1478 
 1479         IGMPSTAT_INC(igps_rcv_total);
 1480 
 1481         ip = mtod(m, struct ip *);
 1482         iphlen = *offp;
 1483         igmplen = ntohs(ip->ip_len) - iphlen;
 1484 
 1485         /*
 1486          * Validate lengths.
 1487          */
 1488         if (igmplen < IGMP_MINLEN) {
 1489                 IGMPSTAT_INC(igps_rcv_tooshort);
 1490                 m_freem(m);
 1491                 return (IPPROTO_DONE);
 1492         }
 1493 
 1494         /*
 1495          * Always pullup to the minimum size for v1/v2 or v3
 1496          * to amortize calls to m_pullup().
 1497          */
 1498         minlen = iphlen;
 1499         if (igmplen >= IGMP_V3_QUERY_MINLEN)
 1500                 minlen += IGMP_V3_QUERY_MINLEN;
 1501         else
 1502                 minlen += IGMP_MINLEN;
 1503         if ((!M_WRITABLE(m) || m->m_len < minlen) &&
 1504             (m = m_pullup(m, minlen)) == NULL) {
 1505                 IGMPSTAT_INC(igps_rcv_tooshort);
 1506                 return (IPPROTO_DONE);
 1507         }
 1508         ip = mtod(m, struct ip *);
 1509 
 1510         /*
 1511          * Validate checksum.
 1512          */
 1513         m->m_data += iphlen;
 1514         m->m_len -= iphlen;
 1515         igmp = mtod(m, struct igmp *);
 1516         if (in_cksum(m, igmplen)) {
 1517                 IGMPSTAT_INC(igps_rcv_badsum);
 1518                 m_freem(m);
 1519                 return (IPPROTO_DONE);
 1520         }
 1521         m->m_data -= iphlen;
 1522         m->m_len += iphlen;
 1523 
 1524         /*
 1525          * IGMP control traffic is link-scope, and must have a TTL of 1.
 1526          * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
 1527          * probe packets may come from beyond the LAN.
 1528          */
 1529         if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
 1530                 IGMPSTAT_INC(igps_rcv_badttl);
 1531                 m_freem(m);
 1532                 return (IPPROTO_DONE);
 1533         }
 1534 
 1535         switch (igmp->igmp_type) {
 1536         case IGMP_HOST_MEMBERSHIP_QUERY:
 1537                 if (igmplen == IGMP_MINLEN) {
 1538                         if (igmp->igmp_code == 0)
 1539                                 queryver = IGMP_VERSION_1;
 1540                         else
 1541                                 queryver = IGMP_VERSION_2;
 1542                 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
 1543                         queryver = IGMP_VERSION_3;
 1544                 } else {
 1545                         IGMPSTAT_INC(igps_rcv_tooshort);
 1546                         m_freem(m);
 1547                         return (IPPROTO_DONE);
 1548                 }
 1549 
 1550                 switch (queryver) {
 1551                 case IGMP_VERSION_1:
 1552                         IGMPSTAT_INC(igps_rcv_v1v2_queries);
 1553                         if (!V_igmp_v1enable)
 1554                                 break;
 1555                         if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
 1556                                 m_freem(m);
 1557                                 return (IPPROTO_DONE);
 1558                         }
 1559                         break;
 1560 
 1561                 case IGMP_VERSION_2:
 1562                         IGMPSTAT_INC(igps_rcv_v1v2_queries);
 1563                         if (!V_igmp_v2enable)
 1564                                 break;
 1565                         if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
 1566                                 m_freem(m);
 1567                                 return (IPPROTO_DONE);
 1568                         }
 1569                         break;
 1570 
 1571                 case IGMP_VERSION_3: {
 1572                                 struct igmpv3 *igmpv3;
 1573                                 uint16_t igmpv3len;
 1574                                 uint16_t nsrc;
 1575 
 1576                                 IGMPSTAT_INC(igps_rcv_v3_queries);
 1577                                 igmpv3 = (struct igmpv3 *)igmp;
 1578                                 /*
 1579                                  * Validate length based on source count.
 1580                                  */
 1581                                 nsrc = ntohs(igmpv3->igmp_numsrc);
 1582                                 if (nsrc * sizeof(in_addr_t) >
 1583                                     UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) {
 1584                                         IGMPSTAT_INC(igps_rcv_tooshort);
 1585                                         m_freem(m);
 1586                                         return (IPPROTO_DONE);
 1587                                 }
 1588                                 /*
 1589                                  * m_pullup() may modify m, so pullup in
 1590                                  * this scope.
 1591                                  */
 1592                                 igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
 1593                                    sizeof(struct in_addr) * nsrc;
 1594                                 if ((!M_WRITABLE(m) ||
 1595                                      m->m_len < igmpv3len) &&
 1596                                     (m = m_pullup(m, igmpv3len)) == NULL) {
 1597                                         IGMPSTAT_INC(igps_rcv_tooshort);
 1598                                         return (IPPROTO_DONE);
 1599                                 }
 1600                                 igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *)
 1601                                     + iphlen);
 1602                                 if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
 1603                                         m_freem(m);
 1604                                         return (IPPROTO_DONE);
 1605                                 }
 1606                         }
 1607                         break;
 1608                 }
 1609                 break;
 1610 
 1611         case IGMP_v1_HOST_MEMBERSHIP_REPORT:
 1612                 if (!V_igmp_v1enable)
 1613                         break;
 1614                 if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
 1615                         m_freem(m);
 1616                         return (IPPROTO_DONE);
 1617                 }
 1618                 break;
 1619 
 1620         case IGMP_v2_HOST_MEMBERSHIP_REPORT:
 1621                 if (!V_igmp_v2enable)
 1622                         break;
 1623                 if (!ip_checkrouteralert(m))
 1624                         IGMPSTAT_INC(igps_rcv_nora);
 1625                 if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
 1626                         m_freem(m);
 1627                         return (IPPROTO_DONE);
 1628                 }
 1629                 break;
 1630 
 1631         case IGMP_v3_HOST_MEMBERSHIP_REPORT:
 1632                 /*
 1633                  * Hosts do not need to process IGMPv3 membership reports,
 1634                  * as report suppression is no longer required.
 1635                  */
 1636                 if (!ip_checkrouteralert(m))
 1637                         IGMPSTAT_INC(igps_rcv_nora);
 1638                 break;
 1639 
 1640         default:
 1641                 break;
 1642         }
 1643 
 1644         /*
 1645          * Pass all valid IGMP packets up to any process(es) listening on a
 1646          * raw IGMP socket.
 1647          */
 1648         *mp = m;
 1649         return (rip_input(mp, offp, proto));
 1650 }
 1651 
 1652 /*
 1653  * Fast timeout handler (global).
 1654  * VIMAGE: Timeout handlers are expected to service all vimages.
 1655  */
 1656 static struct callout igmpfast_callout;
 1657 static void
 1658 igmp_fasttimo(void *arg __unused)
 1659 {
 1660         struct epoch_tracker et;
 1661         VNET_ITERATOR_DECL(vnet_iter);
 1662 
 1663         NET_EPOCH_ENTER(et);
 1664         VNET_LIST_RLOCK_NOSLEEP();
 1665         VNET_FOREACH(vnet_iter) {
 1666                 CURVNET_SET(vnet_iter);
 1667                 igmp_fasttimo_vnet();
 1668                 CURVNET_RESTORE();
 1669         }
 1670         VNET_LIST_RUNLOCK_NOSLEEP();
 1671         NET_EPOCH_EXIT(et);
 1672 
 1673         callout_reset(&igmpfast_callout, hz / IGMP_FASTHZ, igmp_fasttimo, NULL);
 1674 }
 1675 
 1676 /*
 1677  * Fast timeout handler (per-vnet).
 1678  * Sends are shuffled off to a netisr to deal with Giant.
 1679  *
 1680  * VIMAGE: Assume caller has set up our curvnet.
 1681  */
 1682 static void
 1683 igmp_fasttimo_vnet(void)
 1684 {
 1685         struct mbufq             scq;   /* State-change packets */
 1686         struct mbufq             qrq;   /* Query response packets */
 1687         struct ifnet            *ifp;
 1688         struct igmp_ifsoftc     *igi;
 1689         struct ifmultiaddr      *ifma;
 1690         struct in_multi         *inm;
 1691         struct in_multi_head inm_free_tmp;
 1692         int                      loop, uri_fasthz;
 1693 
 1694         loop = 0;
 1695         uri_fasthz = 0;
 1696 
 1697         /*
 1698          * Quick check to see if any work needs to be done, in order to
 1699          * minimize the overhead of fasttimo processing.
 1700          * SMPng: XXX Unlocked reads.
 1701          */
 1702         if (!V_current_state_timers_running &&
 1703             !V_interface_timers_running &&
 1704             !V_state_change_timers_running)
 1705                 return;
 1706 
 1707         SLIST_INIT(&inm_free_tmp);
 1708         IN_MULTI_LIST_LOCK();
 1709         IGMP_LOCK();
 1710 
 1711         /*
 1712          * IGMPv3 General Query response timer processing.
 1713          */
 1714         if (V_interface_timers_running) {
 1715                 CTR1(KTR_IGMPV3, "%s: interface timers running", __func__);
 1716 
 1717                 V_interface_timers_running = 0;
 1718                 LIST_FOREACH(igi, &V_igi_head, igi_link) {
 1719                         if (igi->igi_v3_timer == 0) {
 1720                                 /* Do nothing. */
 1721                         } else if (--igi->igi_v3_timer == 0) {
 1722                                 igmp_v3_dispatch_general_query(igi);
 1723                         } else {
 1724                                 V_interface_timers_running = 1;
 1725                         }
 1726                 }
 1727         }
 1728 
 1729         if (!V_current_state_timers_running &&
 1730             !V_state_change_timers_running)
 1731                 goto out_locked;
 1732 
 1733         V_current_state_timers_running = 0;
 1734         V_state_change_timers_running = 0;
 1735 
 1736         CTR1(KTR_IGMPV3, "%s: state change timers running", __func__);
 1737 
 1738         /*
 1739          * IGMPv1/v2/v3 host report and state-change timer processing.
 1740          * Note: Processing a v3 group timer may remove a node.
 1741          */
 1742         LIST_FOREACH(igi, &V_igi_head, igi_link) {
 1743                 ifp = igi->igi_ifp;
 1744 
 1745                 if (igi->igi_version == IGMP_VERSION_3) {
 1746                         loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 1747                         uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri *
 1748                             IGMP_FASTHZ);
 1749                         mbufq_init(&qrq, IGMP_MAX_G_GS_PACKETS);
 1750                         mbufq_init(&scq, IGMP_MAX_STATE_CHANGE_PACKETS);
 1751                 }
 1752 
 1753                 IF_ADDR_WLOCK(ifp);
 1754                 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 1755                         inm = inm_ifmultiaddr_get_inm(ifma);
 1756                         if (inm == NULL)
 1757                                 continue;
 1758                         switch (igi->igi_version) {
 1759                         case IGMP_VERSION_1:
 1760                         case IGMP_VERSION_2:
 1761                                 igmp_v1v2_process_group_timer(inm,
 1762                                     igi->igi_version);
 1763                                 break;
 1764                         case IGMP_VERSION_3:
 1765                                 igmp_v3_process_group_timers(&inm_free_tmp, &qrq,
 1766                                     &scq, inm, uri_fasthz);
 1767                                 break;
 1768                         }
 1769                 }
 1770                 IF_ADDR_WUNLOCK(ifp);
 1771 
 1772                 if (igi->igi_version == IGMP_VERSION_3) {
 1773                         igmp_dispatch_queue(&qrq, 0, loop);
 1774                         igmp_dispatch_queue(&scq, 0, loop);
 1775 
 1776                         /*
 1777                          * Free the in_multi reference(s) for this
 1778                          * IGMP lifecycle.
 1779                          */
 1780                         inm_release_list_deferred(&inm_free_tmp);
 1781                 }
 1782         }
 1783 
 1784 out_locked:
 1785         IGMP_UNLOCK();
 1786         IN_MULTI_LIST_UNLOCK();
 1787 }
 1788 
 1789 /*
 1790  * Update host report group timer for IGMPv1/v2.
 1791  * Will update the global pending timer flags.
 1792  */
 1793 static void
 1794 igmp_v1v2_process_group_timer(struct in_multi *inm, const int version)
 1795 {
 1796         int report_timer_expired;
 1797 
 1798         IN_MULTI_LIST_LOCK_ASSERT();
 1799         IGMP_LOCK_ASSERT();
 1800 
 1801         if (inm->inm_timer == 0) {
 1802                 report_timer_expired = 0;
 1803         } else if (--inm->inm_timer == 0) {
 1804                 report_timer_expired = 1;
 1805         } else {
 1806                 V_current_state_timers_running = 1;
 1807                 return;
 1808         }
 1809 
 1810         switch (inm->inm_state) {
 1811         case IGMP_NOT_MEMBER:
 1812         case IGMP_SILENT_MEMBER:
 1813         case IGMP_IDLE_MEMBER:
 1814         case IGMP_LAZY_MEMBER:
 1815         case IGMP_SLEEPING_MEMBER:
 1816         case IGMP_AWAKENING_MEMBER:
 1817                 break;
 1818         case IGMP_REPORTING_MEMBER:
 1819                 if (report_timer_expired) {
 1820                         inm->inm_state = IGMP_IDLE_MEMBER;
 1821                         (void)igmp_v1v2_queue_report(inm,
 1822                             (version == IGMP_VERSION_2) ?
 1823                              IGMP_v2_HOST_MEMBERSHIP_REPORT :
 1824                              IGMP_v1_HOST_MEMBERSHIP_REPORT);
 1825                 }
 1826                 break;
 1827         case IGMP_G_QUERY_PENDING_MEMBER:
 1828         case IGMP_SG_QUERY_PENDING_MEMBER:
 1829         case IGMP_LEAVING_MEMBER:
 1830                 break;
 1831         }
 1832 }
 1833 
 1834 /*
 1835  * Update a group's timers for IGMPv3.
 1836  * Will update the global pending timer flags.
 1837  * Note: Unlocked read from igi.
 1838  */
 1839 static void
 1840 igmp_v3_process_group_timers(struct in_multi_head *inmh,
 1841     struct mbufq *qrq, struct mbufq *scq,
 1842     struct in_multi *inm, const int uri_fasthz)
 1843 {
 1844         int query_response_timer_expired;
 1845         int state_change_retransmit_timer_expired;
 1846 
 1847         IN_MULTI_LIST_LOCK_ASSERT();
 1848         IGMP_LOCK_ASSERT();
 1849 
 1850         query_response_timer_expired = 0;
 1851         state_change_retransmit_timer_expired = 0;
 1852 
 1853         /*
 1854          * During a transition from v1/v2 compatibility mode back to v3,
 1855          * a group record in REPORTING state may still have its group
 1856          * timer active. This is a no-op in this function; it is easier
 1857          * to deal with it here than to complicate the slow-timeout path.
 1858          */
 1859         if (inm->inm_timer == 0) {
 1860                 query_response_timer_expired = 0;
 1861         } else if (--inm->inm_timer == 0) {
 1862                 query_response_timer_expired = 1;
 1863         } else {
 1864                 V_current_state_timers_running = 1;
 1865         }
 1866 
 1867         if (inm->inm_sctimer == 0) {
 1868                 state_change_retransmit_timer_expired = 0;
 1869         } else if (--inm->inm_sctimer == 0) {
 1870                 state_change_retransmit_timer_expired = 1;
 1871         } else {
 1872                 V_state_change_timers_running = 1;
 1873         }
 1874 
 1875         /* We are in fasttimo, so be quick about it. */
 1876         if (!state_change_retransmit_timer_expired &&
 1877             !query_response_timer_expired)
 1878                 return;
 1879 
 1880         switch (inm->inm_state) {
 1881         case IGMP_NOT_MEMBER:
 1882         case IGMP_SILENT_MEMBER:
 1883         case IGMP_SLEEPING_MEMBER:
 1884         case IGMP_LAZY_MEMBER:
 1885         case IGMP_AWAKENING_MEMBER:
 1886         case IGMP_IDLE_MEMBER:
 1887                 break;
 1888         case IGMP_G_QUERY_PENDING_MEMBER:
 1889         case IGMP_SG_QUERY_PENDING_MEMBER:
 1890                 /*
 1891                  * Respond to a previously pending Group-Specific
 1892                  * or Group-and-Source-Specific query by enqueueing
 1893                  * the appropriate Current-State report for
 1894                  * immediate transmission.
 1895                  */
 1896                 if (query_response_timer_expired) {
 1897                         int retval __unused;
 1898 
 1899                         retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
 1900                             (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
 1901                         CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 1902                             __func__, retval);
 1903                         inm->inm_state = IGMP_REPORTING_MEMBER;
 1904                         /* XXX Clear recorded sources for next time. */
 1905                         inm_clear_recorded(inm);
 1906                 }
 1907                 /* FALLTHROUGH */
 1908         case IGMP_REPORTING_MEMBER:
 1909         case IGMP_LEAVING_MEMBER:
 1910                 if (state_change_retransmit_timer_expired) {
 1911                         /*
 1912                          * State-change retransmission timer fired.
 1913                          * If there are any further pending retransmissions,
 1914                          * set the global pending state-change flag, and
 1915                          * reset the timer.
 1916                          */
 1917                         if (--inm->inm_scrv > 0) {
 1918                                 inm->inm_sctimer = uri_fasthz;
 1919                                 V_state_change_timers_running = 1;
 1920                         }
 1921                         /*
 1922                          * Retransmit the previously computed state-change
 1923                          * report. If there are no further pending
 1924                          * retransmissions, the mbuf queue will be consumed.
 1925                          * Update T0 state to T1 as we have now sent
 1926                          * a state-change.
 1927                          */
 1928                         (void)igmp_v3_merge_state_changes(inm, scq);
 1929 
 1930                         inm_commit(inm);
 1931                         CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__,
 1932                             ntohl(inm->inm_addr.s_addr),
 1933                             inm->inm_ifp->if_xname);
 1934 
 1935                         /*
 1936                          * If we are leaving the group for good, make sure
 1937                          * we release IGMP's reference to it.
 1938                          * This release must be deferred using a SLIST,
 1939                          * as we are called from a loop which traverses
 1940                          * the in_ifmultiaddr TAILQ.
 1941                          */
 1942                         if (inm->inm_state == IGMP_LEAVING_MEMBER &&
 1943                             inm->inm_scrv == 0) {
 1944                                 inm->inm_state = IGMP_NOT_MEMBER;
 1945                                 inm_rele_locked(inmh, inm);
 1946                         }
 1947                 }
 1948                 break;
 1949         }
 1950 }
 1951 
 1952 /*
 1953  * Suppress a group's pending response to a group or source/group query.
 1954  *
 1955  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
 1956  * Do NOT update ST1/ST0 as this operation merely suppresses
 1957  * the currently pending group record.
 1958  * Do NOT suppress the response to a general query. It is possible but
 1959  * it would require adding another state or flag.
 1960  */
 1961 static void
 1962 igmp_v3_suppress_group_record(struct in_multi *inm)
 1963 {
 1964 
 1965         IN_MULTI_LIST_LOCK_ASSERT();
 1966 
 1967         KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3,
 1968                 ("%s: not IGMPv3 mode on link", __func__));
 1969 
 1970         if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
 1971             inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER)
 1972                 return;
 1973 
 1974         if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
 1975                 inm_clear_recorded(inm);
 1976 
 1977         inm->inm_timer = 0;
 1978         inm->inm_state = IGMP_REPORTING_MEMBER;
 1979 }
 1980 
 1981 /*
 1982  * Switch to a different IGMP version on the given interface,
 1983  * as per Section 7.2.1.
 1984  */
 1985 static void
 1986 igmp_set_version(struct igmp_ifsoftc *igi, const int version)
 1987 {
 1988         int old_version_timer;
 1989 
 1990         IGMP_LOCK_ASSERT();
 1991 
 1992         CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__,
 1993             version, igi->igi_ifp, igi->igi_ifp->if_xname);
 1994 
 1995         if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) {
 1996                 /*
 1997                  * Compute the "Older Version Querier Present" timer as per
 1998                  * Section 8.12.
 1999                  */
 2000                 old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
 2001                 old_version_timer *= IGMP_SLOWHZ;
 2002 
 2003                 if (version == IGMP_VERSION_1) {
 2004                         igi->igi_v1_timer = old_version_timer;
 2005                         igi->igi_v2_timer = 0;
 2006                 } else if (version == IGMP_VERSION_2) {
 2007                         igi->igi_v1_timer = 0;
 2008                         igi->igi_v2_timer = old_version_timer;
 2009                 }
 2010         }
 2011 
 2012         if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
 2013                 if (igi->igi_version != IGMP_VERSION_2) {
 2014                         igi->igi_version = IGMP_VERSION_2;
 2015                         igmp_v3_cancel_link_timers(igi);
 2016                 }
 2017         } else if (igi->igi_v1_timer > 0) {
 2018                 if (igi->igi_version != IGMP_VERSION_1) {
 2019                         igi->igi_version = IGMP_VERSION_1;
 2020                         igmp_v3_cancel_link_timers(igi);
 2021                 }
 2022         }
 2023 }
 2024 
 2025 /*
 2026  * Cancel pending IGMPv3 timers for the given link and all groups
 2027  * joined on it; state-change, general-query, and group-query timers.
 2028  *
 2029  * Only ever called on a transition from v3 to Compatibility mode. Kill
 2030  * the timers stone dead (this may be expensive for large N groups), they
 2031  * will be restarted if Compatibility Mode deems that they must be due to
 2032  * query processing.
 2033  */
 2034 static void
 2035 igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi)
 2036 {
 2037         struct ifmultiaddr      *ifma;
 2038         struct ifnet            *ifp;
 2039         struct in_multi         *inm;
 2040         struct in_multi_head inm_free_tmp;
 2041 
 2042         CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__,
 2043             igi->igi_ifp, igi->igi_ifp->if_xname);
 2044 
 2045         IN_MULTI_LIST_LOCK_ASSERT();
 2046         IGMP_LOCK_ASSERT();
 2047         NET_EPOCH_ASSERT();
 2048 
 2049         SLIST_INIT(&inm_free_tmp);
 2050 
 2051         /*
 2052          * Stop the v3 General Query Response on this link stone dead.
 2053          * If fasttimo is woken up due to V_interface_timers_running,
 2054          * the flag will be cleared if there are no pending link timers.
 2055          */
 2056         igi->igi_v3_timer = 0;
 2057 
 2058         /*
 2059          * Now clear the current-state and state-change report timers
 2060          * for all memberships scoped to this link.
 2061          */
 2062         ifp = igi->igi_ifp;
 2063         IF_ADDR_WLOCK(ifp);
 2064         CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 2065                 inm = inm_ifmultiaddr_get_inm(ifma);
 2066                 if (inm == NULL)
 2067                         continue;
 2068                 switch (inm->inm_state) {
 2069                 case IGMP_NOT_MEMBER:
 2070                 case IGMP_SILENT_MEMBER:
 2071                 case IGMP_IDLE_MEMBER:
 2072                 case IGMP_LAZY_MEMBER:
 2073                 case IGMP_SLEEPING_MEMBER:
 2074                 case IGMP_AWAKENING_MEMBER:
 2075                         /*
 2076                          * These states are either not relevant in v3 mode,
 2077                          * or are unreported. Do nothing.
 2078                          */
 2079                         break;
 2080                 case IGMP_LEAVING_MEMBER:
 2081                         /*
 2082                          * If we are leaving the group and switching to
 2083                          * compatibility mode, we need to release the final
 2084                          * reference held for issuing the INCLUDE {}, and
 2085                          * transition to REPORTING to ensure the host leave
 2086                          * message is sent upstream to the old querier --
 2087                          * transition to NOT would lose the leave and race.
 2088                          */
 2089                         inm_rele_locked(&inm_free_tmp, inm);
 2090                         /* FALLTHROUGH */
 2091                 case IGMP_G_QUERY_PENDING_MEMBER:
 2092                 case IGMP_SG_QUERY_PENDING_MEMBER:
 2093                         inm_clear_recorded(inm);
 2094                         /* FALLTHROUGH */
 2095                 case IGMP_REPORTING_MEMBER:
 2096                         inm->inm_state = IGMP_REPORTING_MEMBER;
 2097                         break;
 2098                 }
 2099                 /*
 2100                  * Always clear state-change and group report timers.
 2101                  * Free any pending IGMPv3 state-change records.
 2102                  */
 2103                 inm->inm_sctimer = 0;
 2104                 inm->inm_timer = 0;
 2105                 mbufq_drain(&inm->inm_scq);
 2106         }
 2107         IF_ADDR_WUNLOCK(ifp);
 2108 
 2109         inm_release_list_deferred(&inm_free_tmp);
 2110 }
 2111 
 2112 /*
 2113  * Update the Older Version Querier Present timers for a link.
 2114  * See Section 7.2.1 of RFC 3376.
 2115  */
 2116 static void
 2117 igmp_v1v2_process_querier_timers(struct igmp_ifsoftc *igi)
 2118 {
 2119 
 2120         IGMP_LOCK_ASSERT();
 2121 
 2122         if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
 2123                 /*
 2124                  * IGMPv1 and IGMPv2 Querier Present timers expired.
 2125                  *
 2126                  * Revert to IGMPv3.
 2127                  */
 2128                 if (igi->igi_version != IGMP_VERSION_3) {
 2129                         CTR5(KTR_IGMPV3,
 2130                             "%s: transition from v%d -> v%d on %p(%s)",
 2131                             __func__, igi->igi_version, IGMP_VERSION_3,
 2132                             igi->igi_ifp, igi->igi_ifp->if_xname);
 2133                         igi->igi_version = IGMP_VERSION_3;
 2134                 }
 2135         } else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
 2136                 /*
 2137                  * IGMPv1 Querier Present timer expired,
 2138                  * IGMPv2 Querier Present timer running.
 2139                  * If IGMPv2 was disabled since last timeout,
 2140                  * revert to IGMPv3.
 2141                  * If IGMPv2 is enabled, revert to IGMPv2.
 2142                  */
 2143                 if (!V_igmp_v2enable) {
 2144                         CTR5(KTR_IGMPV3,
 2145                             "%s: transition from v%d -> v%d on %p(%s)",
 2146                             __func__, igi->igi_version, IGMP_VERSION_3,
 2147                             igi->igi_ifp, igi->igi_ifp->if_xname);
 2148                         igi->igi_v2_timer = 0;
 2149                         igi->igi_version = IGMP_VERSION_3;
 2150                 } else {
 2151                         --igi->igi_v2_timer;
 2152                         if (igi->igi_version != IGMP_VERSION_2) {
 2153                                 CTR5(KTR_IGMPV3,
 2154                                     "%s: transition from v%d -> v%d on %p(%s)",
 2155                                     __func__, igi->igi_version, IGMP_VERSION_2,
 2156                                     igi->igi_ifp, igi->igi_ifp->if_xname);
 2157                                 igi->igi_version = IGMP_VERSION_2;
 2158                                 igmp_v3_cancel_link_timers(igi);
 2159                         }
 2160                 }
 2161         } else if (igi->igi_v1_timer > 0) {
 2162                 /*
 2163                  * IGMPv1 Querier Present timer running.
 2164                  * Stop IGMPv2 timer if running.
 2165                  *
 2166                  * If IGMPv1 was disabled since last timeout,
 2167                  * revert to IGMPv3.
 2168                  * If IGMPv1 is enabled, reset IGMPv2 timer if running.
 2169                  */
 2170                 if (!V_igmp_v1enable) {
 2171                         CTR5(KTR_IGMPV3,
 2172                             "%s: transition from v%d -> v%d on %p(%s)",
 2173                             __func__, igi->igi_version, IGMP_VERSION_3,
 2174                             igi->igi_ifp, igi->igi_ifp->if_xname);
 2175                         igi->igi_v1_timer = 0;
 2176                         igi->igi_version = IGMP_VERSION_3;
 2177                 } else {
 2178                         --igi->igi_v1_timer;
 2179                 }
 2180                 if (igi->igi_v2_timer > 0) {
 2181                         CTR3(KTR_IGMPV3,
 2182                             "%s: cancel v2 timer on %p(%s)",
 2183                             __func__, igi->igi_ifp, igi->igi_ifp->if_xname);
 2184                         igi->igi_v2_timer = 0;
 2185                 }
 2186         }
 2187 }
 2188 
 2189 /*
 2190  * Global slowtimo handler.
 2191  * VIMAGE: Timeout handlers are expected to service all vimages.
 2192  */
 2193 static struct callout igmpslow_callout;
 2194 static void
 2195 igmp_slowtimo(void *arg __unused)
 2196 {
 2197         struct epoch_tracker et;
 2198         VNET_ITERATOR_DECL(vnet_iter);
 2199 
 2200         NET_EPOCH_ENTER(et);
 2201         VNET_LIST_RLOCK_NOSLEEP();
 2202         VNET_FOREACH(vnet_iter) {
 2203                 CURVNET_SET(vnet_iter);
 2204                 igmp_slowtimo_vnet();
 2205                 CURVNET_RESTORE();
 2206         }
 2207         VNET_LIST_RUNLOCK_NOSLEEP();
 2208         NET_EPOCH_EXIT(et);
 2209 
 2210         callout_reset(&igmpslow_callout, hz / IGMP_SLOWHZ, igmp_slowtimo, NULL);
 2211 }
 2212 
 2213 /*
 2214  * Per-vnet slowtimo handler.
 2215  */
 2216 static void
 2217 igmp_slowtimo_vnet(void)
 2218 {
 2219         struct igmp_ifsoftc *igi;
 2220 
 2221         IGMP_LOCK();
 2222 
 2223         LIST_FOREACH(igi, &V_igi_head, igi_link) {
 2224                 igmp_v1v2_process_querier_timers(igi);
 2225         }
 2226 
 2227         IGMP_UNLOCK();
 2228 }
 2229 
 2230 /*
 2231  * Dispatch an IGMPv1/v2 host report or leave message.
 2232  * These are always small enough to fit inside a single mbuf.
 2233  */
 2234 static int
 2235 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
 2236 {
 2237         struct epoch_tracker    et;
 2238         struct ifnet            *ifp;
 2239         struct igmp             *igmp;
 2240         struct ip               *ip;
 2241         struct mbuf             *m;
 2242 
 2243         IN_MULTI_LIST_LOCK_ASSERT();
 2244         IGMP_LOCK_ASSERT();
 2245 
 2246         ifp = inm->inm_ifp;
 2247 
 2248         m = m_gethdr(M_NOWAIT, MT_DATA);
 2249         if (m == NULL)
 2250                 return (ENOMEM);
 2251         M_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
 2252 
 2253         m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
 2254 
 2255         m->m_data += sizeof(struct ip);
 2256         m->m_len = sizeof(struct igmp);
 2257 
 2258         igmp = mtod(m, struct igmp *);
 2259         igmp->igmp_type = type;
 2260         igmp->igmp_code = 0;
 2261         igmp->igmp_group = inm->inm_addr;
 2262         igmp->igmp_cksum = 0;
 2263         igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
 2264 
 2265         m->m_data -= sizeof(struct ip);
 2266         m->m_len += sizeof(struct ip);
 2267 
 2268         ip = mtod(m, struct ip *);
 2269         ip->ip_tos = 0;
 2270         ip->ip_len = htons(sizeof(struct ip) + sizeof(struct igmp));
 2271         ip->ip_off = 0;
 2272         ip->ip_p = IPPROTO_IGMP;
 2273         ip->ip_src.s_addr = INADDR_ANY;
 2274 
 2275         if (type == IGMP_HOST_LEAVE_MESSAGE)
 2276                 ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
 2277         else
 2278                 ip->ip_dst = inm->inm_addr;
 2279 
 2280         igmp_save_context(m, ifp);
 2281 
 2282         m->m_flags |= M_IGMPV2;
 2283         if (inm->inm_igi->igi_flags & IGIF_LOOPBACK)
 2284                 m->m_flags |= M_IGMP_LOOP;
 2285 
 2286         CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m);
 2287         NET_EPOCH_ENTER(et);
 2288         netisr_dispatch(NETISR_IGMP, m);
 2289         NET_EPOCH_EXIT(et);
 2290 
 2291         return (0);
 2292 }
 2293 
 2294 /*
 2295  * Process a state change from the upper layer for the given IPv4 group.
 2296  *
 2297  * Each socket holds a reference on the in_multi in its own ip_moptions.
 2298  * The socket layer will have made the necessary updates to.the group
 2299  * state, it is now up to IGMP to issue a state change report if there
 2300  * has been any change between T0 (when the last state-change was issued)
 2301  * and T1 (now).
 2302  *
 2303  * We use the IGMPv3 state machine at group level. The IGMP module
 2304  * however makes the decision as to which IGMP protocol version to speak.
 2305  * A state change *from* INCLUDE {} always means an initial join.
 2306  * A state change *to* INCLUDE {} always means a final leave.
 2307  *
 2308  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
 2309  * save ourselves a bunch of work; any exclusive mode groups need not
 2310  * compute source filter lists.
 2311  *
 2312  * VIMAGE: curvnet should have been set by caller, as this routine
 2313  * is called from the socket option handlers.
 2314  */
 2315 int
 2316 igmp_change_state(struct in_multi *inm)
 2317 {
 2318         struct igmp_ifsoftc *igi;
 2319         struct ifnet *ifp;
 2320         int error;
 2321 
 2322         error = 0;
 2323         IN_MULTI_LOCK_ASSERT();
 2324         /*
 2325          * Try to detect if the upper layer just asked us to change state
 2326          * for an interface which has now gone away.
 2327          */
 2328         KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
 2329         ifp = inm->inm_ifma->ifma_ifp;
 2330         if (ifp == NULL)
 2331                 return (0);
 2332         /*
 2333          * Sanity check that netinet's notion of ifp is the
 2334          * same as net's.
 2335          */
 2336         KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
 2337 
 2338         IGMP_LOCK();
 2339 
 2340         igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 2341         KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
 2342 
 2343         /*
 2344          * If we detect a state transition to or from MCAST_UNDEFINED
 2345          * for this group, then we are starting or finishing an IGMP
 2346          * life cycle for this group.
 2347          */
 2348         if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
 2349                 CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__,
 2350                     inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode);
 2351                 if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
 2352                         CTR1(KTR_IGMPV3, "%s: initial join", __func__);
 2353                         error = igmp_initial_join(inm, igi);
 2354                         goto out_locked;
 2355                 } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
 2356                         CTR1(KTR_IGMPV3, "%s: final leave", __func__);
 2357                         igmp_final_leave(inm, igi);
 2358                         goto out_locked;
 2359                 }
 2360         } else {
 2361                 CTR1(KTR_IGMPV3, "%s: filter set change", __func__);
 2362         }
 2363 
 2364         error = igmp_handle_state_change(inm, igi);
 2365 
 2366 out_locked:
 2367         IGMP_UNLOCK();
 2368         return (error);
 2369 }
 2370 
 2371 /*
 2372  * Perform the initial join for an IGMP group.
 2373  *
 2374  * When joining a group:
 2375  *  If the group should have its IGMP traffic suppressed, do nothing.
 2376  *  IGMPv1 starts sending IGMPv1 host membership reports.
 2377  *  IGMPv2 starts sending IGMPv2 host membership reports.
 2378  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
 2379  *  initial state of the membership.
 2380  */
 2381 static int
 2382 igmp_initial_join(struct in_multi *inm, struct igmp_ifsoftc *igi)
 2383 {
 2384         struct ifnet            *ifp;
 2385         struct mbufq            *mq;
 2386         int                      error, retval, syncstates;
 2387 
 2388         CTR4(KTR_IGMPV3, "%s: initial join 0x%08x on ifp %p(%s)", __func__,
 2389             ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname);
 2390 
 2391         error = 0;
 2392         syncstates = 1;
 2393 
 2394         ifp = inm->inm_ifp;
 2395 
 2396         IN_MULTI_LOCK_ASSERT();
 2397         IGMP_LOCK_ASSERT();
 2398 
 2399         KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
 2400 
 2401         /*
 2402          * Groups joined on loopback or marked as 'not reported',
 2403          * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
 2404          * are never reported in any IGMP protocol exchanges.
 2405          * All other groups enter the appropriate IGMP state machine
 2406          * for the version in use on this link.
 2407          * A link marked as IGIF_SILENT causes IGMP to be completely
 2408          * disabled for the link.
 2409          */
 2410         if ((ifp->if_flags & IFF_LOOPBACK) ||
 2411             (igi->igi_flags & IGIF_SILENT) ||
 2412             !igmp_isgroupreported(inm->inm_addr)) {
 2413                 CTR1(KTR_IGMPV3,
 2414 "%s: not kicking state machine for silent group", __func__);
 2415                 inm->inm_state = IGMP_SILENT_MEMBER;
 2416                 inm->inm_timer = 0;
 2417         } else {
 2418                 /*
 2419                  * Deal with overlapping in_multi lifecycle.
 2420                  * If this group was LEAVING, then make sure
 2421                  * we drop the reference we picked up to keep the
 2422                  * group around for the final INCLUDE {} enqueue.
 2423                  */
 2424                 if (igi->igi_version == IGMP_VERSION_3 &&
 2425                     inm->inm_state == IGMP_LEAVING_MEMBER) {
 2426                         MPASS(inm->inm_refcount > 1);
 2427                         inm_rele_locked(NULL, inm);
 2428                 }
 2429                 inm->inm_state = IGMP_REPORTING_MEMBER;
 2430 
 2431                 switch (igi->igi_version) {
 2432                 case IGMP_VERSION_1:
 2433                 case IGMP_VERSION_2:
 2434                         inm->inm_state = IGMP_IDLE_MEMBER;
 2435                         error = igmp_v1v2_queue_report(inm,
 2436                             (igi->igi_version == IGMP_VERSION_2) ?
 2437                              IGMP_v2_HOST_MEMBERSHIP_REPORT :
 2438                              IGMP_v1_HOST_MEMBERSHIP_REPORT);
 2439                         if (error == 0) {
 2440                                 inm->inm_timer = IGMP_RANDOM_DELAY(
 2441                                     IGMP_V1V2_MAX_RI * IGMP_FASTHZ);
 2442                                 V_current_state_timers_running = 1;
 2443                         }
 2444                         break;
 2445 
 2446                 case IGMP_VERSION_3:
 2447                         /*
 2448                          * Defer update of T0 to T1, until the first copy
 2449                          * of the state change has been transmitted.
 2450                          */
 2451                         syncstates = 0;
 2452 
 2453                         /*
 2454                          * Immediately enqueue a State-Change Report for
 2455                          * this interface, freeing any previous reports.
 2456                          * Don't kick the timers if there is nothing to do,
 2457                          * or if an error occurred.
 2458                          */
 2459                         mq = &inm->inm_scq;
 2460                         mbufq_drain(mq);
 2461                         retval = igmp_v3_enqueue_group_record(mq, inm, 1,
 2462                             0, 0);
 2463                         CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 2464                             __func__, retval);
 2465                         if (retval <= 0) {
 2466                                 error = retval * -1;
 2467                                 break;
 2468                         }
 2469 
 2470                         /*
 2471                          * Schedule transmission of pending state-change
 2472                          * report up to RV times for this link. The timer
 2473                          * will fire at the next igmp_fasttimo (~200ms),
 2474                          * giving us an opportunity to merge the reports.
 2475                          */
 2476                         if (igi->igi_flags & IGIF_LOOPBACK) {
 2477                                 inm->inm_scrv = 1;
 2478                         } else {
 2479                                 KASSERT(igi->igi_rv > 1,
 2480                                    ("%s: invalid robustness %d", __func__,
 2481                                     igi->igi_rv));
 2482                                 inm->inm_scrv = igi->igi_rv;
 2483                         }
 2484                         inm->inm_sctimer = 1;
 2485                         V_state_change_timers_running = 1;
 2486 
 2487                         error = 0;
 2488                         break;
 2489                 }
 2490         }
 2491 
 2492         /*
 2493          * Only update the T0 state if state change is atomic,
 2494          * i.e. we don't need to wait for a timer to fire before we
 2495          * can consider the state change to have been communicated.
 2496          */
 2497         if (syncstates) {
 2498                 inm_commit(inm);
 2499                 CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__,
 2500                     ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname);
 2501         }
 2502 
 2503         return (error);
 2504 }
 2505 
 2506 /*
 2507  * Issue an intermediate state change during the IGMP life-cycle.
 2508  */
 2509 static int
 2510 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifsoftc *igi)
 2511 {
 2512         struct ifnet            *ifp;
 2513         int                      retval;
 2514 
 2515         CTR4(KTR_IGMPV3, "%s: state change for 0x%08x on ifp %p(%s)", __func__,
 2516             ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname);
 2517 
 2518         ifp = inm->inm_ifp;
 2519 
 2520         IN_MULTI_LIST_LOCK_ASSERT();
 2521         IGMP_LOCK_ASSERT();
 2522 
 2523         KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
 2524 
 2525         if ((ifp->if_flags & IFF_LOOPBACK) ||
 2526             (igi->igi_flags & IGIF_SILENT) ||
 2527             !igmp_isgroupreported(inm->inm_addr) ||
 2528             (igi->igi_version != IGMP_VERSION_3)) {
 2529                 if (!igmp_isgroupreported(inm->inm_addr)) {
 2530                         CTR1(KTR_IGMPV3,
 2531 "%s: not kicking state machine for silent group", __func__);
 2532                 }
 2533                 CTR1(KTR_IGMPV3, "%s: nothing to do", __func__);
 2534                 inm_commit(inm);
 2535                 CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__,
 2536                     ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname);
 2537                 return (0);
 2538         }
 2539 
 2540         mbufq_drain(&inm->inm_scq);
 2541 
 2542         retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
 2543         CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval);
 2544         if (retval <= 0)
 2545                 return (-retval);
 2546 
 2547         /*
 2548          * If record(s) were enqueued, start the state-change
 2549          * report timer for this group.
 2550          */
 2551         inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv);
 2552         inm->inm_sctimer = 1;
 2553         V_state_change_timers_running = 1;
 2554 
 2555         return (0);
 2556 }
 2557 
 2558 /*
 2559  * Perform the final leave for an IGMP group.
 2560  *
 2561  * When leaving a group:
 2562  *  IGMPv1 does nothing.
 2563  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
 2564  *  IGMPv3 enqueues a state-change report containing a transition
 2565  *  to INCLUDE {} for immediate transmission.
 2566  */
 2567 static void
 2568 igmp_final_leave(struct in_multi *inm, struct igmp_ifsoftc *igi)
 2569 {
 2570         int syncstates;
 2571 
 2572         syncstates = 1;
 2573 
 2574         CTR4(KTR_IGMPV3, "%s: final leave 0x%08x on ifp %p(%s)",
 2575             __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp,
 2576             inm->inm_ifp->if_xname);
 2577 
 2578         IN_MULTI_LIST_LOCK_ASSERT();
 2579         IGMP_LOCK_ASSERT();
 2580 
 2581         switch (inm->inm_state) {
 2582         case IGMP_NOT_MEMBER:
 2583         case IGMP_SILENT_MEMBER:
 2584         case IGMP_LEAVING_MEMBER:
 2585                 /* Already leaving or left; do nothing. */
 2586                 CTR1(KTR_IGMPV3,
 2587 "%s: not kicking state machine for silent group", __func__);
 2588                 break;
 2589         case IGMP_REPORTING_MEMBER:
 2590         case IGMP_IDLE_MEMBER:
 2591         case IGMP_G_QUERY_PENDING_MEMBER:
 2592         case IGMP_SG_QUERY_PENDING_MEMBER:
 2593                 if (igi->igi_version == IGMP_VERSION_2) {
 2594 #ifdef INVARIANTS
 2595                         if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
 2596                             inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
 2597                         panic("%s: IGMPv3 state reached, not IGMPv3 mode",
 2598                              __func__);
 2599 #endif
 2600                         igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE);
 2601                         inm->inm_state = IGMP_NOT_MEMBER;
 2602                 } else if (igi->igi_version == IGMP_VERSION_3) {
 2603                         /*
 2604                          * Stop group timer and all pending reports.
 2605                          * Immediately enqueue a state-change report
 2606                          * TO_IN {} to be sent on the next fast timeout,
 2607                          * giving us an opportunity to merge reports.
 2608                          */
 2609                         mbufq_drain(&inm->inm_scq);
 2610                         inm->inm_timer = 0;
 2611                         if (igi->igi_flags & IGIF_LOOPBACK) {
 2612                                 inm->inm_scrv = 1;
 2613                         } else {
 2614                                 inm->inm_scrv = igi->igi_rv;
 2615                         }
 2616                         CTR4(KTR_IGMPV3, "%s: Leaving 0x%08x/%s with %d "
 2617                             "pending retransmissions.", __func__,
 2618                             ntohl(inm->inm_addr.s_addr),
 2619                             inm->inm_ifp->if_xname, inm->inm_scrv);
 2620                         if (inm->inm_scrv == 0) {
 2621                                 inm->inm_state = IGMP_NOT_MEMBER;
 2622                                 inm->inm_sctimer = 0;
 2623                         } else {
 2624                                 int retval __unused;
 2625 
 2626                                 inm_acquire_locked(inm);
 2627 
 2628                                 retval = igmp_v3_enqueue_group_record(
 2629                                     &inm->inm_scq, inm, 1, 0, 0);
 2630                                 KASSERT(retval != 0,
 2631                                     ("%s: enqueue record = %d", __func__,
 2632                                      retval));
 2633 
 2634                                 inm->inm_state = IGMP_LEAVING_MEMBER;
 2635                                 inm->inm_sctimer = 1;
 2636                                 V_state_change_timers_running = 1;
 2637                                 syncstates = 0;
 2638                         }
 2639                         break;
 2640                 }
 2641                 break;
 2642         case IGMP_LAZY_MEMBER:
 2643         case IGMP_SLEEPING_MEMBER:
 2644         case IGMP_AWAKENING_MEMBER:
 2645                 /* Our reports are suppressed; do nothing. */
 2646                 break;
 2647         }
 2648 
 2649         if (syncstates) {
 2650                 inm_commit(inm);
 2651                 CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__,
 2652                     ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname);
 2653                 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
 2654                 CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for 0x%08x/%s",
 2655                     __func__, ntohl(inm->inm_addr.s_addr),
 2656                     inm->inm_ifp->if_xname);
 2657         }
 2658 }
 2659 
 2660 /*
 2661  * Enqueue an IGMPv3 group record to the given output queue.
 2662  *
 2663  * XXX This function could do with having the allocation code
 2664  * split out, and the multiple-tree-walks coalesced into a single
 2665  * routine as has been done in igmp_v3_enqueue_filter_change().
 2666  *
 2667  * If is_state_change is zero, a current-state record is appended.
 2668  * If is_state_change is non-zero, a state-change report is appended.
 2669  *
 2670  * If is_group_query is non-zero, an mbuf packet chain is allocated.
 2671  * If is_group_query is zero, and if there is a packet with free space
 2672  * at the tail of the queue, it will be appended to providing there
 2673  * is enough free space.
 2674  * Otherwise a new mbuf packet chain is allocated.
 2675  *
 2676  * If is_source_query is non-zero, each source is checked to see if
 2677  * it was recorded for a Group-Source query, and will be omitted if
 2678  * it is not both in-mode and recorded.
 2679  *
 2680  * The function will attempt to allocate leading space in the packet
 2681  * for the IP/IGMP header to be prepended without fragmenting the chain.
 2682  *
 2683  * If successful the size of all data appended to the queue is returned,
 2684  * otherwise an error code less than zero is returned, or zero if
 2685  * no record(s) were appended.
 2686  */
 2687 static int
 2688 igmp_v3_enqueue_group_record(struct mbufq *mq, struct in_multi *inm,
 2689     const int is_state_change, const int is_group_query,
 2690     const int is_source_query)
 2691 {
 2692         struct igmp_grouprec     ig;
 2693         struct igmp_grouprec    *pig;
 2694         struct ifnet            *ifp;
 2695         struct ip_msource       *ims, *nims;
 2696         struct mbuf             *m0, *m, *md;
 2697         int                      is_filter_list_change;
 2698         int                      minrec0len, m0srcs, msrcs, nbytes, off;
 2699         int                      record_has_sources;
 2700         int                      now;
 2701         int                      type;
 2702         in_addr_t                naddr;
 2703         uint8_t                  mode;
 2704 
 2705         IN_MULTI_LIST_LOCK_ASSERT();
 2706 
 2707         ifp = inm->inm_ifp;
 2708         is_filter_list_change = 0;
 2709         m = NULL;
 2710         m0 = NULL;
 2711         m0srcs = 0;
 2712         msrcs = 0;
 2713         nbytes = 0;
 2714         nims = NULL;
 2715         record_has_sources = 1;
 2716         pig = NULL;
 2717         type = IGMP_DO_NOTHING;
 2718         mode = inm->inm_st[1].iss_fmode;
 2719 
 2720         /*
 2721          * If we did not transition out of ASM mode during t0->t1,
 2722          * and there are no source nodes to process, we can skip
 2723          * the generation of source records.
 2724          */
 2725         if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
 2726             inm->inm_nsrc == 0)
 2727                 record_has_sources = 0;
 2728 
 2729         if (is_state_change) {
 2730                 /*
 2731                  * Queue a state change record.
 2732                  * If the mode did not change, and there are non-ASM
 2733                  * listeners or source filters present,
 2734                  * we potentially need to issue two records for the group.
 2735                  * If we are transitioning to MCAST_UNDEFINED, we need
 2736                  * not send any sources.
 2737                  * If there are ASM listeners, and there was no filter
 2738                  * mode transition of any kind, do nothing.
 2739                  */
 2740                 if (mode != inm->inm_st[0].iss_fmode) {
 2741                         if (mode == MCAST_EXCLUDE) {
 2742                                 CTR1(KTR_IGMPV3, "%s: change to EXCLUDE",
 2743                                     __func__);
 2744                                 type = IGMP_CHANGE_TO_EXCLUDE_MODE;
 2745                         } else {
 2746                                 CTR1(KTR_IGMPV3, "%s: change to INCLUDE",
 2747                                     __func__);
 2748                                 type = IGMP_CHANGE_TO_INCLUDE_MODE;
 2749                                 if (mode == MCAST_UNDEFINED)
 2750                                         record_has_sources = 0;
 2751                         }
 2752                 } else {
 2753                         if (record_has_sources) {
 2754                                 is_filter_list_change = 1;
 2755                         } else {
 2756                                 type = IGMP_DO_NOTHING;
 2757                         }
 2758                 }
 2759         } else {
 2760                 /*
 2761                  * Queue a current state record.
 2762                  */
 2763                 if (mode == MCAST_EXCLUDE) {
 2764                         type = IGMP_MODE_IS_EXCLUDE;
 2765                 } else if (mode == MCAST_INCLUDE) {
 2766                         type = IGMP_MODE_IS_INCLUDE;
 2767                         KASSERT(inm->inm_st[1].iss_asm == 0,
 2768                             ("%s: inm %p is INCLUDE but ASM count is %d",
 2769                              __func__, inm, inm->inm_st[1].iss_asm));
 2770                 }
 2771         }
 2772 
 2773         /*
 2774          * Generate the filter list changes using a separate function.
 2775          */
 2776         if (is_filter_list_change)
 2777                 return (igmp_v3_enqueue_filter_change(mq, inm));
 2778 
 2779         if (type == IGMP_DO_NOTHING) {
 2780                 CTR3(KTR_IGMPV3, "%s: nothing to do for 0x%08x/%s", __func__,
 2781                     ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname);
 2782                 return (0);
 2783         }
 2784 
 2785         /*
 2786          * If any sources are present, we must be able to fit at least
 2787          * one in the trailing space of the tail packet's mbuf,
 2788          * ideally more.
 2789          */
 2790         minrec0len = sizeof(struct igmp_grouprec);
 2791         if (record_has_sources)
 2792                 minrec0len += sizeof(in_addr_t);
 2793 
 2794         CTR4(KTR_IGMPV3, "%s: queueing %s for 0x%08x/%s", __func__,
 2795             igmp_rec_type_to_str(type), ntohl(inm->inm_addr.s_addr),
 2796             inm->inm_ifp->if_xname);
 2797 
 2798         /*
 2799          * Check if we have a packet in the tail of the queue for this
 2800          * group into which the first group record for this group will fit.
 2801          * Otherwise allocate a new packet.
 2802          * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
 2803          * Note: Group records for G/GSR query responses MUST be sent
 2804          * in their own packet.
 2805          */
 2806         m0 = mbufq_last(mq);
 2807         if (!is_group_query &&
 2808             m0 != NULL &&
 2809             (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
 2810             (m0->m_pkthdr.len + minrec0len) <
 2811              (ifp->if_mtu - IGMP_LEADINGSPACE)) {
 2812                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 2813                             sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 2814                 m = m0;
 2815                 CTR1(KTR_IGMPV3, "%s: use existing packet", __func__);
 2816         } else {
 2817                 if (mbufq_full(mq)) {
 2818                         CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
 2819                         return (-ENOMEM);
 2820                 }
 2821                 m = NULL;
 2822                 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 2823                     sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 2824                 if (!is_state_change && !is_group_query) {
 2825                         m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 2826                         if (m)
 2827                                 m->m_data += IGMP_LEADINGSPACE;
 2828                 }
 2829                 if (m == NULL) {
 2830                         m = m_gethdr(M_NOWAIT, MT_DATA);
 2831                         if (m)
 2832                                 M_ALIGN(m, IGMP_LEADINGSPACE);
 2833                 }
 2834                 if (m == NULL)
 2835                         return (-ENOMEM);
 2836 
 2837                 igmp_save_context(m, ifp);
 2838 
 2839                 CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__);
 2840         }
 2841 
 2842         /*
 2843          * Append group record.
 2844          * If we have sources, we don't know how many yet.
 2845          */
 2846         ig.ig_type = type;
 2847         ig.ig_datalen = 0;
 2848         ig.ig_numsrc = 0;
 2849         ig.ig_group = inm->inm_addr;
 2850         if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
 2851                 if (m != m0)
 2852                         m_freem(m);
 2853                 CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
 2854                 return (-ENOMEM);
 2855         }
 2856         nbytes += sizeof(struct igmp_grouprec);
 2857 
 2858         /*
 2859          * Append as many sources as will fit in the first packet.
 2860          * If we are appending to a new packet, the chain allocation
 2861          * may potentially use clusters; use m_getptr() in this case.
 2862          * If we are appending to an existing packet, we need to obtain
 2863          * a pointer to the group record after m_append(), in case a new
 2864          * mbuf was allocated.
 2865          * Only append sources which are in-mode at t1. If we are
 2866          * transitioning to MCAST_UNDEFINED state on the group, do not
 2867          * include source entries.
 2868          * Only report recorded sources in our filter set when responding
 2869          * to a group-source query.
 2870          */
 2871         if (record_has_sources) {
 2872                 if (m == m0) {
 2873                         md = m_last(m);
 2874                         pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
 2875                             md->m_len - nbytes);
 2876                 } else {
 2877                         md = m_getptr(m, 0, &off);
 2878                         pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
 2879                             off);
 2880                 }
 2881                 msrcs = 0;
 2882                 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
 2883                         CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__,
 2884                             ims->ims_haddr);
 2885                         now = ims_get_mode(inm, ims, 1);
 2886                         CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now);
 2887                         if ((now != mode) ||
 2888                             (now == mode && mode == MCAST_UNDEFINED)) {
 2889                                 CTR1(KTR_IGMPV3, "%s: skip node", __func__);
 2890                                 continue;
 2891                         }
 2892                         if (is_source_query && ims->ims_stp == 0) {
 2893                                 CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
 2894                                     __func__);
 2895                                 continue;
 2896                         }
 2897                         CTR1(KTR_IGMPV3, "%s: append node", __func__);
 2898                         naddr = htonl(ims->ims_haddr);
 2899                         if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
 2900                                 if (m != m0)
 2901                                         m_freem(m);
 2902                                 CTR1(KTR_IGMPV3, "%s: m_append() failed.",
 2903                                     __func__);
 2904                                 return (-ENOMEM);
 2905                         }
 2906                         nbytes += sizeof(in_addr_t);
 2907                         ++msrcs;
 2908                         if (msrcs == m0srcs)
 2909                                 break;
 2910                 }
 2911                 CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__,
 2912                     msrcs);
 2913                 pig->ig_numsrc = htons(msrcs);
 2914                 nbytes += (msrcs * sizeof(in_addr_t));
 2915         }
 2916 
 2917         if (is_source_query && msrcs == 0) {
 2918                 CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__);
 2919                 if (m != m0)
 2920                         m_freem(m);
 2921                 return (0);
 2922         }
 2923 
 2924         /*
 2925          * We are good to go with first packet.
 2926          */
 2927         if (m != m0) {
 2928                 CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__);
 2929                 m->m_pkthdr.vt_nrecs = 1;
 2930                 mbufq_enqueue(mq, m);
 2931         } else
 2932                 m->m_pkthdr.vt_nrecs++;
 2933 
 2934         /*
 2935          * No further work needed if no source list in packet(s).
 2936          */
 2937         if (!record_has_sources)
 2938                 return (nbytes);
 2939 
 2940         /*
 2941          * Whilst sources remain to be announced, we need to allocate
 2942          * a new packet and fill out as many sources as will fit.
 2943          * Always try for a cluster first.
 2944          */
 2945         while (nims != NULL) {
 2946                 if (mbufq_full(mq)) {
 2947                         CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
 2948                         return (-ENOMEM);
 2949                 }
 2950                 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 2951                 if (m)
 2952                         m->m_data += IGMP_LEADINGSPACE;
 2953                 if (m == NULL) {
 2954                         m = m_gethdr(M_NOWAIT, MT_DATA);
 2955                         if (m)
 2956                                 M_ALIGN(m, IGMP_LEADINGSPACE);
 2957                 }
 2958                 if (m == NULL)
 2959                         return (-ENOMEM);
 2960                 igmp_save_context(m, ifp);
 2961                 md = m_getptr(m, 0, &off);
 2962                 pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off);
 2963                 CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__);
 2964 
 2965                 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
 2966                         if (m != m0)
 2967                                 m_freem(m);
 2968                         CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
 2969                         return (-ENOMEM);
 2970                 }
 2971                 m->m_pkthdr.vt_nrecs = 1;
 2972                 nbytes += sizeof(struct igmp_grouprec);
 2973 
 2974                 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 2975                     sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 2976 
 2977                 msrcs = 0;
 2978                 RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
 2979                         CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__,
 2980                             ims->ims_haddr);
 2981                         now = ims_get_mode(inm, ims, 1);
 2982                         if ((now != mode) ||
 2983                             (now == mode && mode == MCAST_UNDEFINED)) {
 2984                                 CTR1(KTR_IGMPV3, "%s: skip node", __func__);
 2985                                 continue;
 2986                         }
 2987                         if (is_source_query && ims->ims_stp == 0) {
 2988                                 CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
 2989                                     __func__);
 2990                                 continue;
 2991                         }
 2992                         CTR1(KTR_IGMPV3, "%s: append node", __func__);
 2993                         naddr = htonl(ims->ims_haddr);
 2994                         if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
 2995                                 if (m != m0)
 2996                                         m_freem(m);
 2997                                 CTR1(KTR_IGMPV3, "%s: m_append() failed.",
 2998                                     __func__);
 2999                                 return (-ENOMEM);
 3000                         }
 3001                         ++msrcs;
 3002                         if (msrcs == m0srcs)
 3003                                 break;
 3004                 }
 3005                 pig->ig_numsrc = htons(msrcs);
 3006                 nbytes += (msrcs * sizeof(in_addr_t));
 3007 
 3008                 CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__);
 3009                 mbufq_enqueue(mq, m);
 3010         }
 3011 
 3012         return (nbytes);
 3013 }
 3014 
 3015 /*
 3016  * Type used to mark record pass completion.
 3017  * We exploit the fact we can cast to this easily from the
 3018  * current filter modes on each ip_msource node.
 3019  */
 3020 typedef enum {
 3021         REC_NONE = 0x00,        /* MCAST_UNDEFINED */
 3022         REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
 3023         REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
 3024         REC_FULL = REC_ALLOW | REC_BLOCK
 3025 } rectype_t;
 3026 
 3027 /*
 3028  * Enqueue an IGMPv3 filter list change to the given output queue.
 3029  *
 3030  * Source list filter state is held in an RB-tree. When the filter list
 3031  * for a group is changed without changing its mode, we need to compute
 3032  * the deltas between T0 and T1 for each source in the filter set,
 3033  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
 3034  *
 3035  * As we may potentially queue two record types, and the entire R-B tree
 3036  * needs to be walked at once, we break this out into its own function
 3037  * so we can generate a tightly packed queue of packets.
 3038  *
 3039  * XXX This could be written to only use one tree walk, although that makes
 3040  * serializing into the mbuf chains a bit harder. For now we do two walks
 3041  * which makes things easier on us, and it may or may not be harder on
 3042  * the L2 cache.
 3043  *
 3044  * If successful the size of all data appended to the queue is returned,
 3045  * otherwise an error code less than zero is returned, or zero if
 3046  * no record(s) were appended.
 3047  */
 3048 static int
 3049 igmp_v3_enqueue_filter_change(struct mbufq *mq, struct in_multi *inm)
 3050 {
 3051         static const int MINRECLEN =
 3052             sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
 3053         struct ifnet            *ifp;
 3054         struct igmp_grouprec     ig;
 3055         struct igmp_grouprec    *pig;
 3056         struct ip_msource       *ims, *nims;
 3057         struct mbuf             *m, *m0, *md;
 3058         in_addr_t                naddr;
 3059         int                      m0srcs, nbytes, npbytes, off, rsrcs, schanged;
 3060 #ifdef KTR
 3061         int                      nallow, nblock;
 3062 #endif
 3063         uint8_t                  mode, now, then;
 3064         rectype_t                crt, drt, nrt;
 3065 
 3066         IN_MULTI_LIST_LOCK_ASSERT();
 3067 
 3068         if (inm->inm_nsrc == 0 ||
 3069             (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
 3070                 return (0);
 3071 
 3072         ifp = inm->inm_ifp;                     /* interface */
 3073         mode = inm->inm_st[1].iss_fmode;        /* filter mode at t1 */
 3074         crt = REC_NONE; /* current group record type */
 3075         drt = REC_NONE; /* mask of completed group record types */
 3076         nrt = REC_NONE; /* record type for current node */
 3077         m0srcs = 0;     /* # source which will fit in current mbuf chain */
 3078         nbytes = 0;     /* # of bytes appended to group's state-change queue */
 3079         npbytes = 0;    /* # of bytes appended this packet */
 3080         rsrcs = 0;      /* # sources encoded in current record */
 3081         schanged = 0;   /* # nodes encoded in overall filter change */
 3082 #ifdef KTR
 3083         nallow = 0;     /* # of source entries in ALLOW_NEW */
 3084         nblock = 0;     /* # of source entries in BLOCK_OLD */
 3085 #endif
 3086         nims = NULL;    /* next tree node pointer */
 3087 
 3088         /*
 3089          * For each possible filter record mode.
 3090          * The first kind of source we encounter tells us which
 3091          * is the first kind of record we start appending.
 3092          * If a node transitioned to UNDEFINED at t1, its mode is treated
 3093          * as the inverse of the group's filter mode.
 3094          */
 3095         while (drt != REC_FULL) {
 3096                 do {
 3097                         m0 = mbufq_last(mq);
 3098                         if (m0 != NULL &&
 3099                             (m0->m_pkthdr.vt_nrecs + 1 <=
 3100                              IGMP_V3_REPORT_MAXRECS) &&
 3101                             (m0->m_pkthdr.len + MINRECLEN) <
 3102                              (ifp->if_mtu - IGMP_LEADINGSPACE)) {
 3103                                 m = m0;
 3104                                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 3105                                             sizeof(struct igmp_grouprec)) /
 3106                                     sizeof(in_addr_t);
 3107                                 CTR1(KTR_IGMPV3,
 3108                                     "%s: use previous packet", __func__);
 3109                         } else {
 3110                                 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 3111                                 if (m)
 3112                                         m->m_data += IGMP_LEADINGSPACE;
 3113                                 if (m == NULL) {
 3114                                         m = m_gethdr(M_NOWAIT, MT_DATA);
 3115                                         if (m)
 3116                                                 M_ALIGN(m, IGMP_LEADINGSPACE);
 3117                                 }
 3118                                 if (m == NULL) {
 3119                                         CTR1(KTR_IGMPV3,
 3120                                             "%s: m_get*() failed", __func__);
 3121                                         return (-ENOMEM);
 3122                                 }
 3123                                 m->m_pkthdr.vt_nrecs = 0;
 3124                                 igmp_save_context(m, ifp);
 3125                                 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 3126                                     sizeof(struct igmp_grouprec)) /
 3127                                     sizeof(in_addr_t);
 3128                                 npbytes = 0;
 3129                                 CTR1(KTR_IGMPV3,
 3130                                     "%s: allocated new packet", __func__);
 3131                         }
 3132                         /*
 3133                          * Append the IGMP group record header to the
 3134                          * current packet's data area.
 3135                          * Recalculate pointer to free space for next
 3136                          * group record, in case m_append() allocated
 3137                          * a new mbuf or cluster.
 3138                          */
 3139                         memset(&ig, 0, sizeof(ig));
 3140                         ig.ig_group = inm->inm_addr;
 3141                         if (!m_append(m, sizeof(ig), (void *)&ig)) {
 3142                                 if (m != m0)
 3143                                         m_freem(m);
 3144                                 CTR1(KTR_IGMPV3,
 3145                                     "%s: m_append() failed", __func__);
 3146                                 return (-ENOMEM);
 3147                         }
 3148                         npbytes += sizeof(struct igmp_grouprec);
 3149                         if (m != m0) {
 3150                                 /* new packet; offset in c hain */
 3151                                 md = m_getptr(m, npbytes -
 3152                                     sizeof(struct igmp_grouprec), &off);
 3153                                 pig = (struct igmp_grouprec *)(mtod(md,
 3154                                     uint8_t *) + off);
 3155                         } else {
 3156                                 /* current packet; offset from last append */
 3157                                 md = m_last(m);
 3158                                 pig = (struct igmp_grouprec *)(mtod(md,
 3159                                     uint8_t *) + md->m_len -
 3160                                     sizeof(struct igmp_grouprec));
 3161                         }
 3162                         /*
 3163                          * Begin walking the tree for this record type
 3164                          * pass, or continue from where we left off
 3165                          * previously if we had to allocate a new packet.
 3166                          * Only report deltas in-mode at t1.
 3167                          * We need not report included sources as allowed
 3168                          * if we are in inclusive mode on the group,
 3169                          * however the converse is not true.
 3170                          */
 3171                         rsrcs = 0;
 3172                         if (nims == NULL)
 3173                                 nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
 3174                         RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
 3175                                 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x",
 3176                                     __func__, ims->ims_haddr);
 3177                                 now = ims_get_mode(inm, ims, 1);
 3178                                 then = ims_get_mode(inm, ims, 0);
 3179                                 CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d",
 3180                                     __func__, then, now);
 3181                                 if (now == then) {
 3182                                         CTR1(KTR_IGMPV3,
 3183                                             "%s: skip unchanged", __func__);
 3184                                         continue;
 3185                                 }
 3186                                 if (mode == MCAST_EXCLUDE &&
 3187                                     now == MCAST_INCLUDE) {
 3188                                         CTR1(KTR_IGMPV3,
 3189                                             "%s: skip IN src on EX group",
 3190                                             __func__);
 3191                                         continue;
 3192                                 }
 3193                                 nrt = (rectype_t)now;
 3194                                 if (nrt == REC_NONE)
 3195                                         nrt = (rectype_t)(~mode & REC_FULL);
 3196                                 if (schanged++ == 0) {
 3197                                         crt = nrt;
 3198                                 } else if (crt != nrt)
 3199                                         continue;
 3200                                 naddr = htonl(ims->ims_haddr);
 3201                                 if (!m_append(m, sizeof(in_addr_t),
 3202                                     (void *)&naddr)) {
 3203                                         if (m != m0)
 3204                                                 m_freem(m);
 3205                                         CTR1(KTR_IGMPV3,
 3206                                             "%s: m_append() failed", __func__);
 3207                                         return (-ENOMEM);
 3208                                 }
 3209 #ifdef KTR
 3210                                 nallow += !!(crt == REC_ALLOW);
 3211                                 nblock += !!(crt == REC_BLOCK);
 3212 #endif
 3213                                 if (++rsrcs == m0srcs)
 3214                                         break;
 3215                         }
 3216                         /*
 3217                          * If we did not append any tree nodes on this
 3218                          * pass, back out of allocations.
 3219                          */
 3220                         if (rsrcs == 0) {
 3221                                 npbytes -= sizeof(struct igmp_grouprec);
 3222                                 if (m != m0) {
 3223                                         CTR1(KTR_IGMPV3,
 3224                                             "%s: m_free(m)", __func__);
 3225                                         m_freem(m);
 3226                                 } else {
 3227                                         CTR1(KTR_IGMPV3,
 3228                                             "%s: m_adj(m, -ig)", __func__);
 3229                                         m_adj(m, -((int)sizeof(
 3230                                             struct igmp_grouprec)));
 3231                                 }
 3232                                 continue;
 3233                         }
 3234                         npbytes += (rsrcs * sizeof(in_addr_t));
 3235                         if (crt == REC_ALLOW)
 3236                                 pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
 3237                         else if (crt == REC_BLOCK)
 3238                                 pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
 3239                         pig->ig_numsrc = htons(rsrcs);
 3240                         /*
 3241                          * Count the new group record, and enqueue this
 3242                          * packet if it wasn't already queued.
 3243                          */
 3244                         m->m_pkthdr.vt_nrecs++;
 3245                         if (m != m0)
 3246                                 mbufq_enqueue(mq, m);
 3247                         nbytes += npbytes;
 3248                 } while (nims != NULL);
 3249                 drt |= crt;
 3250                 crt = (~crt & REC_FULL);
 3251         }
 3252 
 3253         CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
 3254             nallow, nblock);
 3255 
 3256         return (nbytes);
 3257 }
 3258 
 3259 static int
 3260 igmp_v3_merge_state_changes(struct in_multi *inm, struct mbufq *scq)
 3261 {
 3262         struct mbufq    *gq;
 3263         struct mbuf     *m;             /* pending state-change */
 3264         struct mbuf     *m0;            /* copy of pending state-change */
 3265         struct mbuf     *mt;            /* last state-change in packet */
 3266         int              docopy, domerge;
 3267         u_int            recslen;
 3268 
 3269         docopy = 0;
 3270         domerge = 0;
 3271         recslen = 0;
 3272 
 3273         IN_MULTI_LIST_LOCK_ASSERT();
 3274         IGMP_LOCK_ASSERT();
 3275 
 3276         /*
 3277          * If there are further pending retransmissions, make a writable
 3278          * copy of each queued state-change message before merging.
 3279          */
 3280         if (inm->inm_scrv > 0)
 3281                 docopy = 1;
 3282 
 3283         gq = &inm->inm_scq;
 3284 #ifdef KTR
 3285         if (mbufq_first(gq) == NULL) {
 3286                 CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty",
 3287                     __func__, inm);
 3288         }
 3289 #endif
 3290 
 3291         m = mbufq_first(gq);
 3292         while (m != NULL) {
 3293                 /*
 3294                  * Only merge the report into the current packet if
 3295                  * there is sufficient space to do so; an IGMPv3 report
 3296                  * packet may only contain 65,535 group records.
 3297                  * Always use a simple mbuf chain concatentation to do this,
 3298                  * as large state changes for single groups may have
 3299                  * allocated clusters.
 3300                  */
 3301                 domerge = 0;
 3302                 mt = mbufq_last(scq);
 3303                 if (mt != NULL) {
 3304                         recslen = m_length(m, NULL);
 3305 
 3306                         if ((mt->m_pkthdr.vt_nrecs +
 3307                             m->m_pkthdr.vt_nrecs <=
 3308                             IGMP_V3_REPORT_MAXRECS) &&
 3309                             (mt->m_pkthdr.len + recslen <=
 3310                             (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE)))
 3311                                 domerge = 1;
 3312                 }
 3313 
 3314                 if (!domerge && mbufq_full(gq)) {
 3315                         CTR2(KTR_IGMPV3,
 3316                             "%s: outbound queue full, skipping whole packet %p",
 3317                             __func__, m);
 3318                         mt = m->m_nextpkt;
 3319                         if (!docopy)
 3320                                 m_freem(m);
 3321                         m = mt;
 3322                         continue;
 3323                 }
 3324 
 3325                 if (!docopy) {
 3326                         CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m);
 3327                         m0 = mbufq_dequeue(gq);
 3328                         m = m0->m_nextpkt;
 3329                 } else {
 3330                         CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m);
 3331                         m0 = m_dup(m, M_NOWAIT);
 3332                         if (m0 == NULL)
 3333                                 return (ENOMEM);
 3334                         m0->m_nextpkt = NULL;
 3335                         m = m->m_nextpkt;
 3336                 }
 3337 
 3338                 if (!domerge) {
 3339                         CTR3(KTR_IGMPV3, "%s: queueing %p to scq %p)",
 3340                             __func__, m0, scq);
 3341                         mbufq_enqueue(scq, m0);
 3342                 } else {
 3343                         struct mbuf *mtl;       /* last mbuf of packet mt */
 3344 
 3345                         CTR3(KTR_IGMPV3, "%s: merging %p with scq tail %p)",
 3346                             __func__, m0, mt);
 3347 
 3348                         mtl = m_last(mt);
 3349                         m0->m_flags &= ~M_PKTHDR;
 3350                         mt->m_pkthdr.len += recslen;
 3351                         mt->m_pkthdr.vt_nrecs +=
 3352                             m0->m_pkthdr.vt_nrecs;
 3353 
 3354                         mtl->m_next = m0;
 3355                 }
 3356         }
 3357 
 3358         return (0);
 3359 }
 3360 
 3361 /*
 3362  * Respond to a pending IGMPv3 General Query.
 3363  */
 3364 static void
 3365 igmp_v3_dispatch_general_query(struct igmp_ifsoftc *igi)
 3366 {
 3367         struct ifmultiaddr      *ifma;
 3368         struct ifnet            *ifp;
 3369         struct in_multi         *inm;
 3370         int                      retval __unused, loop;
 3371 
 3372         IN_MULTI_LIST_LOCK_ASSERT();
 3373         IGMP_LOCK_ASSERT();
 3374         NET_EPOCH_ASSERT();
 3375 
 3376         KASSERT(igi->igi_version == IGMP_VERSION_3,
 3377             ("%s: called when version %d", __func__, igi->igi_version));
 3378 
 3379         /*
 3380          * Check that there are some packets queued. If so, send them first.
 3381          * For large number of groups the reply to general query can take
 3382          * many packets, we should finish sending them before starting of
 3383          * queuing the new reply.
 3384          */
 3385         if (mbufq_len(&igi->igi_gq) != 0)
 3386                 goto send;
 3387 
 3388         ifp = igi->igi_ifp;
 3389 
 3390         CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 3391                 inm = inm_ifmultiaddr_get_inm(ifma);
 3392                 if (inm == NULL)
 3393                         continue;
 3394                 KASSERT(ifp == inm->inm_ifp,
 3395                     ("%s: inconsistent ifp", __func__));
 3396 
 3397                 switch (inm->inm_state) {
 3398                 case IGMP_NOT_MEMBER:
 3399                 case IGMP_SILENT_MEMBER:
 3400                         break;
 3401                 case IGMP_REPORTING_MEMBER:
 3402                 case IGMP_IDLE_MEMBER:
 3403                 case IGMP_LAZY_MEMBER:
 3404                 case IGMP_SLEEPING_MEMBER:
 3405                 case IGMP_AWAKENING_MEMBER:
 3406                         inm->inm_state = IGMP_REPORTING_MEMBER;
 3407                         retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
 3408                             inm, 0, 0, 0);
 3409                         CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 3410                             __func__, retval);
 3411                         break;
 3412                 case IGMP_G_QUERY_PENDING_MEMBER:
 3413                 case IGMP_SG_QUERY_PENDING_MEMBER:
 3414                 case IGMP_LEAVING_MEMBER:
 3415                         break;
 3416                 }
 3417         }
 3418 
 3419 send:
 3420         loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 3421         igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
 3422 
 3423         /*
 3424          * Slew transmission of bursts over 500ms intervals.
 3425          */
 3426         if (mbufq_first(&igi->igi_gq) != NULL) {
 3427                 igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
 3428                     IGMP_RESPONSE_BURST_INTERVAL);
 3429                 V_interface_timers_running = 1;
 3430         }
 3431 }
 3432 
 3433 /*
 3434  * Transmit the next pending IGMP message in the output queue.
 3435  *
 3436  * We get called from netisr_processqueue(). A mutex private to igmpoq
 3437  * will be acquired and released around this routine.
 3438  *
 3439  * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
 3440  * MRT: Nothing needs to be done, as IGMP traffic is always local to
 3441  * a link and uses a link-scope multicast address.
 3442  */
 3443 static void
 3444 igmp_intr(struct mbuf *m)
 3445 {
 3446         struct ip_moptions       imo;
 3447         struct ifnet            *ifp;
 3448         struct mbuf             *ipopts, *m0;
 3449         int                      error;
 3450         uint32_t                 ifindex;
 3451 
 3452         CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m);
 3453 
 3454         /*
 3455          * Set VNET image pointer from enqueued mbuf chain
 3456          * before doing anything else. Whilst we use interface
 3457          * indexes to guard against interface detach, they are
 3458          * unique to each VIMAGE and must be retrieved.
 3459          */
 3460         CURVNET_SET((struct vnet *)(m->m_pkthdr.PH_loc.ptr));
 3461         ifindex = igmp_restore_context(m);
 3462 
 3463         /*
 3464          * Check if the ifnet still exists. This limits the scope of
 3465          * any race in the absence of a global ifp lock for low cost
 3466          * (an array lookup).
 3467          */
 3468         ifp = ifnet_byindex(ifindex);
 3469         if (ifp == NULL) {
 3470                 CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.",
 3471                     __func__, m, ifindex);
 3472                 m_freem(m);
 3473                 IPSTAT_INC(ips_noroute);
 3474                 goto out;
 3475         }
 3476 
 3477         ipopts = V_igmp_sendra ? m_raopt : NULL;
 3478 
 3479         imo.imo_multicast_ttl  = 1;
 3480         imo.imo_multicast_vif  = -1;
 3481         imo.imo_multicast_loop = (V_ip_mrouter != NULL);
 3482 
 3483         /*
 3484          * If the user requested that IGMP traffic be explicitly
 3485          * redirected to the loopback interface (e.g. they are running a
 3486          * MANET interface and the routing protocol needs to see the
 3487          * updates), handle this now.
 3488          */
 3489         if (m->m_flags & M_IGMP_LOOP)
 3490                 imo.imo_multicast_ifp = V_loif;
 3491         else
 3492                 imo.imo_multicast_ifp = ifp;
 3493 
 3494         if (m->m_flags & M_IGMPV2) {
 3495                 m0 = m;
 3496         } else {
 3497                 m0 = igmp_v3_encap_report(ifp, m);
 3498                 if (m0 == NULL) {
 3499                         CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m);
 3500                         m_freem(m);
 3501                         IPSTAT_INC(ips_odropped);
 3502                         goto out;
 3503                 }
 3504         }
 3505 
 3506         igmp_scrub_context(m0);
 3507         m_clrprotoflags(m);
 3508         m0->m_pkthdr.rcvif = V_loif;
 3509 #ifdef MAC
 3510         mac_netinet_igmp_send(ifp, m0);
 3511 #endif
 3512         error = ip_output(m0, ipopts, NULL, 0, &imo, NULL);
 3513         if (error) {
 3514                 CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error);
 3515                 goto out;
 3516         }
 3517 
 3518         IGMPSTAT_INC(igps_snd_reports);
 3519 
 3520 out:
 3521         /*
 3522          * We must restore the existing vnet pointer before
 3523          * continuing as we are run from netisr context.
 3524          */
 3525         CURVNET_RESTORE();
 3526 }
 3527 
 3528 /*
 3529  * Encapsulate an IGMPv3 report.
 3530  *
 3531  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
 3532  * chain has already had its IP/IGMPv3 header prepended. In this case
 3533  * the function will not attempt to prepend; the lengths and checksums
 3534  * will however be re-computed.
 3535  *
 3536  * Returns a pointer to the new mbuf chain head, or NULL if the
 3537  * allocation failed.
 3538  */
 3539 static struct mbuf *
 3540 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
 3541 {
 3542         struct igmp_report      *igmp;
 3543         struct ip               *ip;
 3544         int                      hdrlen, igmpreclen;
 3545 
 3546         KASSERT((m->m_flags & M_PKTHDR),
 3547             ("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
 3548 
 3549         igmpreclen = m_length(m, NULL);
 3550         hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
 3551 
 3552         if (m->m_flags & M_IGMPV3_HDR) {
 3553                 igmpreclen -= hdrlen;
 3554         } else {
 3555                 M_PREPEND(m, hdrlen, M_NOWAIT);
 3556                 if (m == NULL)
 3557                         return (NULL);
 3558                 m->m_flags |= M_IGMPV3_HDR;
 3559         }
 3560 
 3561         CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen);
 3562 
 3563         m->m_data += sizeof(struct ip);
 3564         m->m_len -= sizeof(struct ip);
 3565 
 3566         igmp = mtod(m, struct igmp_report *);
 3567         igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
 3568         igmp->ir_rsv1 = 0;
 3569         igmp->ir_rsv2 = 0;
 3570         igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
 3571         igmp->ir_cksum = 0;
 3572         igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
 3573         m->m_pkthdr.vt_nrecs = 0;
 3574 
 3575         m->m_data -= sizeof(struct ip);
 3576         m->m_len += sizeof(struct ip);
 3577 
 3578         ip = mtod(m, struct ip *);
 3579         ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
 3580         ip->ip_len = htons(hdrlen + igmpreclen);
 3581         ip->ip_off = htons(IP_DF);
 3582         ip->ip_p = IPPROTO_IGMP;
 3583         ip->ip_sum = 0;
 3584 
 3585         ip->ip_src.s_addr = INADDR_ANY;
 3586 
 3587         if (m->m_flags & M_IGMP_LOOP) {
 3588                 struct in_ifaddr *ia;
 3589 
 3590                 IFP_TO_IA(ifp, ia);
 3591                 if (ia != NULL)
 3592                         ip->ip_src = ia->ia_addr.sin_addr;
 3593         }
 3594 
 3595         ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
 3596 
 3597         return (m);
 3598 }
 3599 
 3600 #ifdef KTR
 3601 static char *
 3602 igmp_rec_type_to_str(const int type)
 3603 {
 3604 
 3605         switch (type) {
 3606                 case IGMP_CHANGE_TO_EXCLUDE_MODE:
 3607                         return "TO_EX";
 3608                         break;
 3609                 case IGMP_CHANGE_TO_INCLUDE_MODE:
 3610                         return "TO_IN";
 3611                         break;
 3612                 case IGMP_MODE_IS_EXCLUDE:
 3613                         return "MODE_EX";
 3614                         break;
 3615                 case IGMP_MODE_IS_INCLUDE:
 3616                         return "MODE_IN";
 3617                         break;
 3618                 case IGMP_ALLOW_NEW_SOURCES:
 3619                         return "ALLOW_NEW";
 3620                         break;
 3621                 case IGMP_BLOCK_OLD_SOURCES:
 3622                         return "BLOCK_OLD";
 3623                         break;
 3624                 default:
 3625                         break;
 3626         }
 3627         return "unknown";
 3628 }
 3629 #endif
 3630 
 3631 #ifdef VIMAGE
 3632 static void
 3633 vnet_igmp_init(const void *unused __unused)
 3634 {
 3635 
 3636         netisr_register_vnet(&igmp_nh);
 3637 }
 3638 VNET_SYSINIT(vnet_igmp_init, SI_SUB_PROTO_MC, SI_ORDER_ANY,
 3639     vnet_igmp_init, NULL);
 3640 
 3641 static void
 3642 vnet_igmp_uninit(const void *unused __unused)
 3643 {
 3644 
 3645         /* This can happen when we shutdown the entire network stack. */
 3646         CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
 3647 
 3648         netisr_unregister_vnet(&igmp_nh);
 3649 }
 3650 VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY,
 3651     vnet_igmp_uninit, NULL);
 3652 #endif
 3653 
 3654 #ifdef DDB
 3655 DB_SHOW_COMMAND(igi_list, db_show_igi_list)
 3656 {
 3657         struct igmp_ifsoftc *igi, *tigi;
 3658         LIST_HEAD(_igi_list, igmp_ifsoftc) *igi_head;
 3659 
 3660         if (!have_addr) {
 3661                 db_printf("usage: show igi_list <addr>\n");
 3662                 return;
 3663         }
 3664         igi_head = (struct _igi_list *)addr;
 3665 
 3666         LIST_FOREACH_SAFE(igi, igi_head, igi_link, tigi) {
 3667                 db_printf("igmp_ifsoftc %p:\n", igi);
 3668                 db_printf("    ifp %p\n", igi->igi_ifp);
 3669                 db_printf("    version %u\n", igi->igi_version);
 3670                 db_printf("    v1_timer %u\n", igi->igi_v1_timer);
 3671                 db_printf("    v2_timer %u\n", igi->igi_v2_timer);
 3672                 db_printf("    v3_timer %u\n", igi->igi_v3_timer);
 3673                 db_printf("    flags %#x\n", igi->igi_flags);
 3674                 db_printf("    rv %u\n", igi->igi_rv);
 3675                 db_printf("    qi %u\n", igi->igi_qi);
 3676                 db_printf("    qri %u\n", igi->igi_qri);
 3677                 db_printf("    uri %u\n", igi->igi_uri);
 3678                 /* struct mbufq    igi_gq; */
 3679                 db_printf("\n");
 3680         }
 3681 }
 3682 #endif
 3683 
 3684 static int
 3685 igmp_modevent(module_t mod, int type, void *unused __unused)
 3686 {
 3687 
 3688         switch (type) {
 3689         case MOD_LOAD:
 3690                 CTR1(KTR_IGMPV3, "%s: initializing", __func__);
 3691                 IGMP_LOCK_INIT();
 3692                 m_raopt = igmp_ra_alloc();
 3693                 netisr_register(&igmp_nh);
 3694                 callout_init(&igmpslow_callout, 1);
 3695                 callout_reset(&igmpslow_callout, hz / IGMP_SLOWHZ,
 3696                     igmp_slowtimo, NULL);
 3697                 callout_init(&igmpfast_callout, 1);
 3698                 callout_reset(&igmpfast_callout, hz / IGMP_FASTHZ,
 3699                     igmp_fasttimo, NULL);
 3700                 break;
 3701         case MOD_UNLOAD:
 3702                 CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
 3703                 netisr_unregister(&igmp_nh);
 3704                 m_free(m_raopt);
 3705                 m_raopt = NULL;
 3706                 IGMP_LOCK_DESTROY();
 3707                 break;
 3708         default:
 3709                 return (EOPNOTSUPP);
 3710         }
 3711         return (0);
 3712 }
 3713 
 3714 static moduledata_t igmp_mod = {
 3715     "igmp",
 3716     igmp_modevent,
 3717     0
 3718 };
 3719 DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE);

Cache object: da87afc1750e95526bc8529c38fdf1ab


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.