The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/in_mcast.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2007-2009 Bruce Simpson.
    3  * Copyright (c) 2005 Robert N. M. Watson.
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  * 3. The name of the author may not be used to endorse or promote
   15  *    products derived from this software without specific prior written
   16  *    permission.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  */
   30 
   31 /*
   32  * IPv4 multicast socket, group, and socket option processing module.
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/8.0/sys/netinet/in_mcast.c 197280 2009-09-17 13:41:59Z bms $");
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/kernel.h>
   41 #include <sys/malloc.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/protosw.h>
   44 #include <sys/socket.h>
   45 #include <sys/socketvar.h>
   46 #include <sys/protosw.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/ktr.h>
   49 #include <sys/tree.h>
   50 
   51 #include <net/if.h>
   52 #include <net/if_dl.h>
   53 #include <net/route.h>
   54 #include <net/vnet.h>
   55 
   56 #include <netinet/in.h>
   57 #include <netinet/in_systm.h>
   58 #include <netinet/in_pcb.h>
   59 #include <netinet/in_var.h>
   60 #include <netinet/ip_var.h>
   61 #include <netinet/igmp_var.h>
   62 
   63 #ifndef KTR_IGMPV3
   64 #define KTR_IGMPV3 KTR_INET
   65 #endif
   66 
   67 #ifndef __SOCKUNION_DECLARED
   68 union sockunion {
   69         struct sockaddr_storage ss;
   70         struct sockaddr         sa;
   71         struct sockaddr_dl      sdl;
   72         struct sockaddr_in      sin;
   73 };
   74 typedef union sockunion sockunion_t;
   75 #define __SOCKUNION_DECLARED
   76 #endif /* __SOCKUNION_DECLARED */
   77 
   78 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
   79     "IPv4 multicast PCB-layer source filter");
   80 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
   81 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
   82 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
   83     "IPv4 multicast IGMP-layer source filter");
   84 
   85 /*
   86  * Locking:
   87  * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
   88  * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
   89  *   it can be taken by code in net/if.c also.
   90  * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
   91  *
   92  * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
   93  * any need for in_multi itself to be virtualized -- it is bound to an ifp
   94  * anyway no matter what happens.
   95  */
   96 struct mtx in_multi_mtx;
   97 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
   98 
   99 /*
  100  * Functions with non-static linkage defined in this file should be
  101  * declared in in_var.h:
  102  *  imo_multi_filter()
  103  *  in_addmulti()
  104  *  in_delmulti()
  105  *  in_joingroup()
  106  *  in_joingroup_locked()
  107  *  in_leavegroup()
  108  *  in_leavegroup_locked()
  109  * and ip_var.h:
  110  *  inp_freemoptions()
  111  *  inp_getmoptions()
  112  *  inp_setmoptions()
  113  *
  114  * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
  115  * and in_delmulti().
  116  */
  117 static void     imf_commit(struct in_mfilter *);
  118 static int      imf_get_source(struct in_mfilter *imf,
  119                     const struct sockaddr_in *psin,
  120                     struct in_msource **);
  121 static struct in_msource *
  122                 imf_graft(struct in_mfilter *, const uint8_t,
  123                     const struct sockaddr_in *);
  124 static void     imf_leave(struct in_mfilter *);
  125 static int      imf_prune(struct in_mfilter *, const struct sockaddr_in *);
  126 static void     imf_purge(struct in_mfilter *);
  127 static void     imf_rollback(struct in_mfilter *);
  128 static void     imf_reap(struct in_mfilter *);
  129 static int      imo_grow(struct ip_moptions *);
  130 static size_t   imo_match_group(const struct ip_moptions *,
  131                     const struct ifnet *, const struct sockaddr *);
  132 static struct in_msource *
  133                 imo_match_source(const struct ip_moptions *, const size_t,
  134                     const struct sockaddr *);
  135 static void     ims_merge(struct ip_msource *ims,
  136                     const struct in_msource *lims, const int rollback);
  137 static int      in_getmulti(struct ifnet *, const struct in_addr *,
  138                     struct in_multi **);
  139 static int      inm_get_source(struct in_multi *inm, const in_addr_t haddr,
  140                     const int noalloc, struct ip_msource **pims);
  141 static int      inm_is_ifp_detached(const struct in_multi *);
  142 static int      inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
  143 static void     inm_purge(struct in_multi *);
  144 static void     inm_reap(struct in_multi *);
  145 static struct ip_moptions *
  146                 inp_findmoptions(struct inpcb *);
  147 static int      inp_get_source_filters(struct inpcb *, struct sockopt *);
  148 static int      inp_join_group(struct inpcb *, struct sockopt *);
  149 static int      inp_leave_group(struct inpcb *, struct sockopt *);
  150 static struct ifnet *
  151                 inp_lookup_mcast_ifp(const struct inpcb *,
  152                     const struct sockaddr_in *, const struct in_addr);
  153 static int      inp_block_unblock_source(struct inpcb *, struct sockopt *);
  154 static int      inp_set_multicast_if(struct inpcb *, struct sockopt *);
  155 static int      inp_set_source_filters(struct inpcb *, struct sockopt *);
  156 static int      sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
  157 
  158 SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv4 multicast");
  159 
  160 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
  161 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
  162     CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
  163     "Max source filters per group");
  164 TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
  165 
  166 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
  167 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
  168     CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
  169     "Max source filters per socket");
  170 TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
  171 
  172 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
  173 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
  174     &in_mcast_loop, 0, "Loopback multicast datagrams by default");
  175 TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
  176 
  177 SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
  178     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
  179     "Per-interface stack-wide source filters");
  180 
  181 /*
  182  * Inline function which wraps assertions for a valid ifp.
  183  * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
  184  * is detached.
  185  */
  186 static int __inline
  187 inm_is_ifp_detached(const struct in_multi *inm)
  188 {
  189         struct ifnet *ifp;
  190 
  191         KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
  192         ifp = inm->inm_ifma->ifma_ifp;
  193         if (ifp != NULL) {
  194                 /*
  195                  * Sanity check that netinet's notion of ifp is the
  196                  * same as net's.
  197                  */
  198                 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
  199         }
  200 
  201         return (ifp == NULL);
  202 }
  203 
  204 /*
  205  * Initialize an in_mfilter structure to a known state at t0, t1
  206  * with an empty source filter list.
  207  */
  208 static __inline void
  209 imf_init(struct in_mfilter *imf, const int st0, const int st1)
  210 {
  211         memset(imf, 0, sizeof(struct in_mfilter));
  212         RB_INIT(&imf->imf_sources);
  213         imf->imf_st[0] = st0;
  214         imf->imf_st[1] = st1;
  215 }
  216 
  217 /*
  218  * Resize the ip_moptions vector to the next power-of-two minus 1.
  219  * May be called with locks held; do not sleep.
  220  */
  221 static int
  222 imo_grow(struct ip_moptions *imo)
  223 {
  224         struct in_multi         **nmships;
  225         struct in_multi         **omships;
  226         struct in_mfilter        *nmfilters;
  227         struct in_mfilter        *omfilters;
  228         size_t                    idx;
  229         size_t                    newmax;
  230         size_t                    oldmax;
  231 
  232         nmships = NULL;
  233         nmfilters = NULL;
  234         omships = imo->imo_membership;
  235         omfilters = imo->imo_mfilters;
  236         oldmax = imo->imo_max_memberships;
  237         newmax = ((oldmax + 1) * 2) - 1;
  238 
  239         if (newmax <= IP_MAX_MEMBERSHIPS) {
  240                 nmships = (struct in_multi **)realloc(omships,
  241                     sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
  242                 nmfilters = (struct in_mfilter *)realloc(omfilters,
  243                     sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
  244                 if (nmships != NULL && nmfilters != NULL) {
  245                         /* Initialize newly allocated source filter heads. */
  246                         for (idx = oldmax; idx < newmax; idx++) {
  247                                 imf_init(&nmfilters[idx], MCAST_UNDEFINED,
  248                                     MCAST_EXCLUDE);
  249                         }
  250                         imo->imo_max_memberships = newmax;
  251                         imo->imo_membership = nmships;
  252                         imo->imo_mfilters = nmfilters;
  253                 }
  254         }
  255 
  256         if (nmships == NULL || nmfilters == NULL) {
  257                 if (nmships != NULL)
  258                         free(nmships, M_IPMOPTS);
  259                 if (nmfilters != NULL)
  260                         free(nmfilters, M_INMFILTER);
  261                 return (ETOOMANYREFS);
  262         }
  263 
  264         return (0);
  265 }
  266 
  267 /*
  268  * Find an IPv4 multicast group entry for this ip_moptions instance
  269  * which matches the specified group, and optionally an interface.
  270  * Return its index into the array, or -1 if not found.
  271  */
  272 static size_t
  273 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
  274     const struct sockaddr *group)
  275 {
  276         const struct sockaddr_in *gsin;
  277         struct in_multi **pinm;
  278         int               idx;
  279         int               nmships;
  280 
  281         gsin = (const struct sockaddr_in *)group;
  282 
  283         /* The imo_membership array may be lazy allocated. */
  284         if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
  285                 return (-1);
  286 
  287         nmships = imo->imo_num_memberships;
  288         pinm = &imo->imo_membership[0];
  289         for (idx = 0; idx < nmships; idx++, pinm++) {
  290                 if (*pinm == NULL)
  291                         continue;
  292                 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
  293                     in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
  294                         break;
  295                 }
  296         }
  297         if (idx >= nmships)
  298                 idx = -1;
  299 
  300         return (idx);
  301 }
  302 
  303 /*
  304  * Find an IPv4 multicast source entry for this imo which matches
  305  * the given group index for this socket, and source address.
  306  *
  307  * NOTE: This does not check if the entry is in-mode, merely if
  308  * it exists, which may not be the desired behaviour.
  309  */
  310 static struct in_msource *
  311 imo_match_source(const struct ip_moptions *imo, const size_t gidx,
  312     const struct sockaddr *src)
  313 {
  314         struct ip_msource        find;
  315         struct in_mfilter       *imf;
  316         struct ip_msource       *ims;
  317         const sockunion_t       *psa;
  318 
  319         KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
  320         KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
  321             ("%s: invalid index %d\n", __func__, (int)gidx));
  322 
  323         /* The imo_mfilters array may be lazy allocated. */
  324         if (imo->imo_mfilters == NULL)
  325                 return (NULL);
  326         imf = &imo->imo_mfilters[gidx];
  327 
  328         /* Source trees are keyed in host byte order. */
  329         psa = (const sockunion_t *)src;
  330         find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
  331         ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
  332 
  333         return ((struct in_msource *)ims);
  334 }
  335 
  336 /*
  337  * Perform filtering for multicast datagrams on a socket by group and source.
  338  *
  339  * Returns 0 if a datagram should be allowed through, or various error codes
  340  * if the socket was not a member of the group, or the source was muted, etc.
  341  */
  342 int
  343 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
  344     const struct sockaddr *group, const struct sockaddr *src)
  345 {
  346         size_t gidx;
  347         struct in_msource *ims;
  348         int mode;
  349 
  350         KASSERT(ifp != NULL, ("%s: null ifp", __func__));
  351 
  352         gidx = imo_match_group(imo, ifp, group);
  353         if (gidx == -1)
  354                 return (MCAST_NOTGMEMBER);
  355 
  356         /*
  357          * Check if the source was included in an (S,G) join.
  358          * Allow reception on exclusive memberships by default,
  359          * reject reception on inclusive memberships by default.
  360          * Exclude source only if an in-mode exclude filter exists.
  361          * Include source only if an in-mode include filter exists.
  362          * NOTE: We are comparing group state here at IGMP t1 (now)
  363          * with socket-layer t0 (since last downcall).
  364          */
  365         mode = imo->imo_mfilters[gidx].imf_st[1];
  366         ims = imo_match_source(imo, gidx, src);
  367 
  368         if ((ims == NULL && mode == MCAST_INCLUDE) ||
  369             (ims != NULL && ims->imsl_st[0] != mode))
  370                 return (MCAST_NOTSMEMBER);
  371 
  372         return (MCAST_PASS);
  373 }
  374 
  375 /*
  376  * Find and return a reference to an in_multi record for (ifp, group),
  377  * and bump its reference count.
  378  * If one does not exist, try to allocate it, and update link-layer multicast
  379  * filters on ifp to listen for group.
  380  * Assumes the IN_MULTI lock is held across the call.
  381  * Return 0 if successful, otherwise return an appropriate error code.
  382  */
  383 static int
  384 in_getmulti(struct ifnet *ifp, const struct in_addr *group,
  385     struct in_multi **pinm)
  386 {
  387         struct sockaddr_in       gsin;
  388         struct ifmultiaddr      *ifma;
  389         struct in_ifinfo        *ii;
  390         struct in_multi         *inm;
  391         int error;
  392 
  393         IN_MULTI_LOCK_ASSERT();
  394 
  395         ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
  396 
  397         inm = inm_lookup(ifp, *group);
  398         if (inm != NULL) {
  399                 /*
  400                  * If we already joined this group, just bump the
  401                  * refcount and return it.
  402                  */
  403                 KASSERT(inm->inm_refcount >= 1,
  404                     ("%s: bad refcount %d", __func__, inm->inm_refcount));
  405                 ++inm->inm_refcount;
  406                 *pinm = inm;
  407                 return (0);
  408         }
  409 
  410         memset(&gsin, 0, sizeof(gsin));
  411         gsin.sin_family = AF_INET;
  412         gsin.sin_len = sizeof(struct sockaddr_in);
  413         gsin.sin_addr = *group;
  414 
  415         /*
  416          * Check if a link-layer group is already associated
  417          * with this network-layer group on the given ifnet.
  418          */
  419         error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
  420         if (error != 0)
  421                 return (error);
  422 
  423         /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
  424         IF_ADDR_LOCK(ifp);
  425 
  426         /*
  427          * If something other than netinet is occupying the link-layer
  428          * group, print a meaningful error message and back out of
  429          * the allocation.
  430          * Otherwise, bump the refcount on the existing network-layer
  431          * group association and return it.
  432          */
  433         if (ifma->ifma_protospec != NULL) {
  434                 inm = (struct in_multi *)ifma->ifma_protospec;
  435 #ifdef INVARIANTS
  436                 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
  437                     __func__));
  438                 KASSERT(ifma->ifma_addr->sa_family == AF_INET,
  439                     ("%s: ifma not AF_INET", __func__));
  440                 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
  441                 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
  442                     !in_hosteq(inm->inm_addr, *group))
  443                         panic("%s: ifma %p is inconsistent with %p (%s)",
  444                             __func__, ifma, inm, inet_ntoa(*group));
  445 #endif
  446                 ++inm->inm_refcount;
  447                 *pinm = inm;
  448                 IF_ADDR_UNLOCK(ifp);
  449                 return (0);
  450         }
  451 
  452         IF_ADDR_LOCK_ASSERT(ifp);
  453 
  454         /*
  455          * A new in_multi record is needed; allocate and initialize it.
  456          * We DO NOT perform an IGMP join as the in_ layer may need to
  457          * push an initial source list down to IGMP to support SSM.
  458          *
  459          * The initial source filter state is INCLUDE, {} as per the RFC.
  460          */
  461         inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
  462         if (inm == NULL) {
  463                 if_delmulti_ifma(ifma);
  464                 IF_ADDR_UNLOCK(ifp);
  465                 return (ENOMEM);
  466         }
  467         inm->inm_addr = *group;
  468         inm->inm_ifp = ifp;
  469         inm->inm_igi = ii->ii_igmp;
  470         inm->inm_ifma = ifma;
  471         inm->inm_refcount = 1;
  472         inm->inm_state = IGMP_NOT_MEMBER;
  473 
  474         /*
  475          * Pending state-changes per group are subject to a bounds check.
  476          */
  477         IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
  478 
  479         inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
  480         inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
  481         RB_INIT(&inm->inm_srcs);
  482 
  483         ifma->ifma_protospec = inm;
  484 
  485         *pinm = inm;
  486 
  487         IF_ADDR_UNLOCK(ifp);
  488         return (0);
  489 }
  490 
  491 /*
  492  * Drop a reference to an in_multi record.
  493  *
  494  * If the refcount drops to 0, free the in_multi record and
  495  * delete the underlying link-layer membership.
  496  */
  497 void
  498 inm_release_locked(struct in_multi *inm)
  499 {
  500         struct ifmultiaddr *ifma;
  501 
  502         IN_MULTI_LOCK_ASSERT();
  503 
  504         CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
  505 
  506         if (--inm->inm_refcount > 0) {
  507                 CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
  508                     inm->inm_refcount);
  509                 return;
  510         }
  511 
  512         CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
  513 
  514         ifma = inm->inm_ifma;
  515 
  516         /* XXX this access is not covered by IF_ADDR_LOCK */
  517         CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
  518         KASSERT(ifma->ifma_protospec == inm,
  519             ("%s: ifma_protospec != inm", __func__));
  520         ifma->ifma_protospec = NULL;
  521 
  522         inm_purge(inm);
  523 
  524         free(inm, M_IPMADDR);
  525 
  526         if_delmulti_ifma(ifma);
  527 }
  528 
  529 /*
  530  * Clear recorded source entries for a group.
  531  * Used by the IGMP code. Caller must hold the IN_MULTI lock.
  532  * FIXME: Should reap.
  533  */
  534 void
  535 inm_clear_recorded(struct in_multi *inm)
  536 {
  537         struct ip_msource       *ims;
  538 
  539         IN_MULTI_LOCK_ASSERT();
  540 
  541         RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
  542                 if (ims->ims_stp) {
  543                         ims->ims_stp = 0;
  544                         --inm->inm_st[1].iss_rec;
  545                 }
  546         }
  547         KASSERT(inm->inm_st[1].iss_rec == 0,
  548             ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
  549 }
  550 
  551 /*
  552  * Record a source as pending for a Source-Group IGMPv3 query.
  553  * This lives here as it modifies the shared tree.
  554  *
  555  * inm is the group descriptor.
  556  * naddr is the address of the source to record in network-byte order.
  557  *
  558  * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
  559  * lazy-allocate a source node in response to an SG query.
  560  * Otherwise, no allocation is performed. This saves some memory
  561  * with the trade-off that the source will not be reported to the
  562  * router if joined in the window between the query response and
  563  * the group actually being joined on the local host.
  564  *
  565  * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
  566  * This turns off the allocation of a recorded source entry if
  567  * the group has not been joined.
  568  *
  569  * Return 0 if the source didn't exist or was already marked as recorded.
  570  * Return 1 if the source was marked as recorded by this function.
  571  * Return <0 if any error occured (negated errno code).
  572  */
  573 int
  574 inm_record_source(struct in_multi *inm, const in_addr_t naddr)
  575 {
  576         struct ip_msource        find;
  577         struct ip_msource       *ims, *nims;
  578 
  579         IN_MULTI_LOCK_ASSERT();
  580 
  581         find.ims_haddr = ntohl(naddr);
  582         ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
  583         if (ims && ims->ims_stp)
  584                 return (0);
  585         if (ims == NULL) {
  586                 if (inm->inm_nsrc == in_mcast_maxgrpsrc)
  587                         return (-ENOSPC);
  588                 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
  589                     M_NOWAIT | M_ZERO);
  590                 if (nims == NULL)
  591                         return (-ENOMEM);
  592                 nims->ims_haddr = find.ims_haddr;
  593                 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
  594                 ++inm->inm_nsrc;
  595                 ims = nims;
  596         }
  597 
  598         /*
  599          * Mark the source as recorded and update the recorded
  600          * source count.
  601          */
  602         ++ims->ims_stp;
  603         ++inm->inm_st[1].iss_rec;
  604 
  605         return (1);
  606 }
  607 
  608 /*
  609  * Return a pointer to an in_msource owned by an in_mfilter,
  610  * given its source address.
  611  * Lazy-allocate if needed. If this is a new entry its filter state is
  612  * undefined at t0.
  613  *
  614  * imf is the filter set being modified.
  615  * haddr is the source address in *host* byte-order.
  616  *
  617  * SMPng: May be called with locks held; malloc must not block.
  618  */
  619 static int
  620 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
  621     struct in_msource **plims)
  622 {
  623         struct ip_msource        find;
  624         struct ip_msource       *ims, *nims;
  625         struct in_msource       *lims;
  626         int                      error;
  627 
  628         error = 0;
  629         ims = NULL;
  630         lims = NULL;
  631 
  632         /* key is host byte order */
  633         find.ims_haddr = ntohl(psin->sin_addr.s_addr);
  634         ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
  635         lims = (struct in_msource *)ims;
  636         if (lims == NULL) {
  637                 if (imf->imf_nsrc == in_mcast_maxsocksrc)
  638                         return (ENOSPC);
  639                 nims = malloc(sizeof(struct in_msource), M_INMFILTER,
  640                     M_NOWAIT | M_ZERO);
  641                 if (nims == NULL)
  642                         return (ENOMEM);
  643                 lims = (struct in_msource *)nims;
  644                 lims->ims_haddr = find.ims_haddr;
  645                 lims->imsl_st[0] = MCAST_UNDEFINED;
  646                 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
  647                 ++imf->imf_nsrc;
  648         }
  649 
  650         *plims = lims;
  651 
  652         return (error);
  653 }
  654 
  655 /*
  656  * Graft a source entry into an existing socket-layer filter set,
  657  * maintaining any required invariants and checking allocations.
  658  *
  659  * The source is marked as being in the new filter mode at t1.
  660  *
  661  * Return the pointer to the new node, otherwise return NULL.
  662  */
  663 static struct in_msource *
  664 imf_graft(struct in_mfilter *imf, const uint8_t st1,
  665     const struct sockaddr_in *psin)
  666 {
  667         struct ip_msource       *nims;
  668         struct in_msource       *lims;
  669 
  670         nims = malloc(sizeof(struct in_msource), M_INMFILTER,
  671             M_NOWAIT | M_ZERO);
  672         if (nims == NULL)
  673                 return (NULL);
  674         lims = (struct in_msource *)nims;
  675         lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
  676         lims->imsl_st[0] = MCAST_UNDEFINED;
  677         lims->imsl_st[1] = st1;
  678         RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
  679         ++imf->imf_nsrc;
  680 
  681         return (lims);
  682 }
  683 
  684 /*
  685  * Prune a source entry from an existing socket-layer filter set,
  686  * maintaining any required invariants and checking allocations.
  687  *
  688  * The source is marked as being left at t1, it is not freed.
  689  *
  690  * Return 0 if no error occurred, otherwise return an errno value.
  691  */
  692 static int
  693 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
  694 {
  695         struct ip_msource        find;
  696         struct ip_msource       *ims;
  697         struct in_msource       *lims;
  698 
  699         /* key is host byte order */
  700         find.ims_haddr = ntohl(psin->sin_addr.s_addr);
  701         ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
  702         if (ims == NULL)
  703                 return (ENOENT);
  704         lims = (struct in_msource *)ims;
  705         lims->imsl_st[1] = MCAST_UNDEFINED;
  706         return (0);
  707 }
  708 
  709 /*
  710  * Revert socket-layer filter set deltas at t1 to t0 state.
  711  */
  712 static void
  713 imf_rollback(struct in_mfilter *imf)
  714 {
  715         struct ip_msource       *ims, *tims;
  716         struct in_msource       *lims;
  717 
  718         RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
  719                 lims = (struct in_msource *)ims;
  720                 if (lims->imsl_st[0] == lims->imsl_st[1]) {
  721                         /* no change at t1 */
  722                         continue;
  723                 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
  724                         /* revert change to existing source at t1 */
  725                         lims->imsl_st[1] = lims->imsl_st[0];
  726                 } else {
  727                         /* revert source added t1 */
  728                         CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
  729                         RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
  730                         free(ims, M_INMFILTER);
  731                         imf->imf_nsrc--;
  732                 }
  733         }
  734         imf->imf_st[1] = imf->imf_st[0];
  735 }
  736 
  737 /*
  738  * Mark socket-layer filter set as INCLUDE {} at t1.
  739  */
  740 static void
  741 imf_leave(struct in_mfilter *imf)
  742 {
  743         struct ip_msource       *ims;
  744         struct in_msource       *lims;
  745 
  746         RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
  747                 lims = (struct in_msource *)ims;
  748                 lims->imsl_st[1] = MCAST_UNDEFINED;
  749         }
  750         imf->imf_st[1] = MCAST_INCLUDE;
  751 }
  752 
  753 /*
  754  * Mark socket-layer filter set deltas as committed.
  755  */
  756 static void
  757 imf_commit(struct in_mfilter *imf)
  758 {
  759         struct ip_msource       *ims;
  760         struct in_msource       *lims;
  761 
  762         RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
  763                 lims = (struct in_msource *)ims;
  764                 lims->imsl_st[0] = lims->imsl_st[1];
  765         }
  766         imf->imf_st[0] = imf->imf_st[1];
  767 }
  768 
  769 /*
  770  * Reap unreferenced sources from socket-layer filter set.
  771  */
  772 static void
  773 imf_reap(struct in_mfilter *imf)
  774 {
  775         struct ip_msource       *ims, *tims;
  776         struct in_msource       *lims;
  777 
  778         RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
  779                 lims = (struct in_msource *)ims;
  780                 if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
  781                     (lims->imsl_st[1] == MCAST_UNDEFINED)) {
  782                         CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
  783                         RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
  784                         free(ims, M_INMFILTER);
  785                         imf->imf_nsrc--;
  786                 }
  787         }
  788 }
  789 
  790 /*
  791  * Purge socket-layer filter set.
  792  */
  793 static void
  794 imf_purge(struct in_mfilter *imf)
  795 {
  796         struct ip_msource       *ims, *tims;
  797 
  798         RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
  799                 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
  800                 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
  801                 free(ims, M_INMFILTER);
  802                 imf->imf_nsrc--;
  803         }
  804         imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
  805         KASSERT(RB_EMPTY(&imf->imf_sources),
  806             ("%s: imf_sources not empty", __func__));
  807 }
  808 
  809 /*
  810  * Look up a source filter entry for a multicast group.
  811  *
  812  * inm is the group descriptor to work with.
  813  * haddr is the host-byte-order IPv4 address to look up.
  814  * noalloc may be non-zero to suppress allocation of sources.
  815  * *pims will be set to the address of the retrieved or allocated source.
  816  *
  817  * SMPng: NOTE: may be called with locks held.
  818  * Return 0 if successful, otherwise return a non-zero error code.
  819  */
  820 static int
  821 inm_get_source(struct in_multi *inm, const in_addr_t haddr,
  822     const int noalloc, struct ip_msource **pims)
  823 {
  824         struct ip_msource        find;
  825         struct ip_msource       *ims, *nims;
  826 #ifdef KTR
  827         struct in_addr ia;
  828 #endif
  829 
  830         find.ims_haddr = haddr;
  831         ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
  832         if (ims == NULL && !noalloc) {
  833                 if (inm->inm_nsrc == in_mcast_maxgrpsrc)
  834                         return (ENOSPC);
  835                 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
  836                     M_NOWAIT | M_ZERO);
  837                 if (nims == NULL)
  838                         return (ENOMEM);
  839                 nims->ims_haddr = haddr;
  840                 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
  841                 ++inm->inm_nsrc;
  842                 ims = nims;
  843 #ifdef KTR
  844                 ia.s_addr = htonl(haddr);
  845                 CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__,
  846                     inet_ntoa(ia), ims);
  847 #endif
  848         }
  849 
  850         *pims = ims;
  851         return (0);
  852 }
  853 
  854 /*
  855  * Merge socket-layer source into IGMP-layer source.
  856  * If rollback is non-zero, perform the inverse of the merge.
  857  */
  858 static void
  859 ims_merge(struct ip_msource *ims, const struct in_msource *lims,
  860     const int rollback)
  861 {
  862         int n = rollback ? -1 : 1;
  863 #ifdef KTR
  864         struct in_addr ia;
  865 
  866         ia.s_addr = htonl(ims->ims_haddr);
  867 #endif
  868 
  869         if (lims->imsl_st[0] == MCAST_EXCLUDE) {
  870                 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s",
  871                     __func__, n, inet_ntoa(ia));
  872                 ims->ims_st[1].ex -= n;
  873         } else if (lims->imsl_st[0] == MCAST_INCLUDE) {
  874                 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s",
  875                     __func__, n, inet_ntoa(ia));
  876                 ims->ims_st[1].in -= n;
  877         }
  878 
  879         if (lims->imsl_st[1] == MCAST_EXCLUDE) {
  880                 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s",
  881                     __func__, n, inet_ntoa(ia));
  882                 ims->ims_st[1].ex += n;
  883         } else if (lims->imsl_st[1] == MCAST_INCLUDE) {
  884                 CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s",
  885                     __func__, n, inet_ntoa(ia));
  886                 ims->ims_st[1].in += n;
  887         }
  888 }
  889 
  890 /*
  891  * Atomically update the global in_multi state, when a membership's
  892  * filter list is being updated in any way.
  893  *
  894  * imf is the per-inpcb-membership group filter pointer.
  895  * A fake imf may be passed for in-kernel consumers.
  896  *
  897  * XXX This is a candidate for a set-symmetric-difference style loop
  898  * which would eliminate the repeated lookup from root of ims nodes,
  899  * as they share the same key space.
  900  *
  901  * If any error occurred this function will back out of refcounts
  902  * and return a non-zero value.
  903  */
  904 static int
  905 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
  906 {
  907         struct ip_msource       *ims, *nims;
  908         struct in_msource       *lims;
  909         int                      schanged, error;
  910         int                      nsrc0, nsrc1;
  911 
  912         schanged = 0;
  913         error = 0;
  914         nsrc1 = nsrc0 = 0;
  915 
  916         /*
  917          * Update the source filters first, as this may fail.
  918          * Maintain count of in-mode filters at t0, t1. These are
  919          * used to work out if we transition into ASM mode or not.
  920          * Maintain a count of source filters whose state was
  921          * actually modified by this operation.
  922          */
  923         RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
  924                 lims = (struct in_msource *)ims;
  925                 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
  926                 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
  927                 if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
  928                 error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
  929                 ++schanged;
  930                 if (error)
  931                         break;
  932                 ims_merge(nims, lims, 0);
  933         }
  934         if (error) {
  935                 struct ip_msource *bims;
  936 
  937                 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
  938                         lims = (struct in_msource *)ims;
  939                         if (lims->imsl_st[0] == lims->imsl_st[1])
  940                                 continue;
  941                         (void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
  942                         if (bims == NULL)
  943                                 continue;
  944                         ims_merge(bims, lims, 1);
  945                 }
  946                 goto out_reap;
  947         }
  948 
  949         CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
  950             __func__, nsrc0, nsrc1);
  951 
  952         /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
  953         if (imf->imf_st[0] == imf->imf_st[1] &&
  954             imf->imf_st[1] == MCAST_INCLUDE) {
  955                 if (nsrc1 == 0) {
  956                         CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
  957                         --inm->inm_st[1].iss_in;
  958                 }
  959         }
  960 
  961         /* Handle filter mode transition on socket. */
  962         if (imf->imf_st[0] != imf->imf_st[1]) {
  963                 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
  964                     __func__, imf->imf_st[0], imf->imf_st[1]);
  965 
  966                 if (imf->imf_st[0] == MCAST_EXCLUDE) {
  967                         CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
  968                         --inm->inm_st[1].iss_ex;
  969                 } else if (imf->imf_st[0] == MCAST_INCLUDE) {
  970                         CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
  971                         --inm->inm_st[1].iss_in;
  972                 }
  973 
  974                 if (imf->imf_st[1] == MCAST_EXCLUDE) {
  975                         CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
  976                         inm->inm_st[1].iss_ex++;
  977                 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
  978                         CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
  979                         inm->inm_st[1].iss_in++;
  980                 }
  981         }
  982 
  983         /*
  984          * Track inm filter state in terms of listener counts.
  985          * If there are any exclusive listeners, stack-wide
  986          * membership is exclusive.
  987          * Otherwise, if only inclusive listeners, stack-wide is inclusive.
  988          * If no listeners remain, state is undefined at t1,
  989          * and the IGMP lifecycle for this group should finish.
  990          */
  991         if (inm->inm_st[1].iss_ex > 0) {
  992                 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
  993                 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
  994         } else if (inm->inm_st[1].iss_in > 0) {
  995                 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
  996                 inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
  997         } else {
  998                 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
  999                 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
 1000         }
 1001 
 1002         /* Decrement ASM listener count on transition out of ASM mode. */
 1003         if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
 1004                 if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
 1005                     (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
 1006                         CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
 1007                         --inm->inm_st[1].iss_asm;
 1008         }
 1009 
 1010         /* Increment ASM listener count on transition to ASM mode. */
 1011         if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
 1012                 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
 1013                 inm->inm_st[1].iss_asm++;
 1014         }
 1015 
 1016         CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
 1017         inm_print(inm);
 1018 
 1019 out_reap:
 1020         if (schanged > 0) {
 1021                 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
 1022                 inm_reap(inm);
 1023         }
 1024         return (error);
 1025 }
 1026 
 1027 /*
 1028  * Mark an in_multi's filter set deltas as committed.
 1029  * Called by IGMP after a state change has been enqueued.
 1030  */
 1031 void
 1032 inm_commit(struct in_multi *inm)
 1033 {
 1034         struct ip_msource       *ims;
 1035 
 1036         CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
 1037         CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
 1038         inm_print(inm);
 1039 
 1040         RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
 1041                 ims->ims_st[0] = ims->ims_st[1];
 1042         }
 1043         inm->inm_st[0] = inm->inm_st[1];
 1044 }
 1045 
 1046 /*
 1047  * Reap unreferenced nodes from an in_multi's filter set.
 1048  */
 1049 static void
 1050 inm_reap(struct in_multi *inm)
 1051 {
 1052         struct ip_msource       *ims, *tims;
 1053 
 1054         RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
 1055                 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
 1056                     ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
 1057                     ims->ims_stp != 0)
 1058                         continue;
 1059                 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 1060                 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
 1061                 free(ims, M_IPMSOURCE);
 1062                 inm->inm_nsrc--;
 1063         }
 1064 }
 1065 
 1066 /*
 1067  * Purge all source nodes from an in_multi's filter set.
 1068  */
 1069 static void
 1070 inm_purge(struct in_multi *inm)
 1071 {
 1072         struct ip_msource       *ims, *tims;
 1073 
 1074         RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
 1075                 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 1076                 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
 1077                 free(ims, M_IPMSOURCE);
 1078                 inm->inm_nsrc--;
 1079         }
 1080 }
 1081 
 1082 /*
 1083  * Join a multicast group; unlocked entry point.
 1084  *
 1085  * SMPng: XXX: in_joingroup() is called from in_control() when Giant
 1086  * is not held. Fortunately, ifp is unlikely to have been detached
 1087  * at this point, so we assume it's OK to recurse.
 1088  */
 1089 int
 1090 in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
 1091     /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
 1092 {
 1093         int error;
 1094 
 1095         IN_MULTI_LOCK();
 1096         error = in_joingroup_locked(ifp, gina, imf, pinm);
 1097         IN_MULTI_UNLOCK();
 1098 
 1099         return (error);
 1100 }
 1101 
 1102 /*
 1103  * Join a multicast group; real entry point.
 1104  *
 1105  * Only preserves atomicity at inm level.
 1106  * NOTE: imf argument cannot be const due to sys/tree.h limitations.
 1107  *
 1108  * If the IGMP downcall fails, the group is not joined, and an error
 1109  * code is returned.
 1110  */
 1111 int
 1112 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
 1113     /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
 1114 {
 1115         struct in_mfilter        timf;
 1116         struct in_multi         *inm;
 1117         int                      error;
 1118 
 1119         IN_MULTI_LOCK_ASSERT();
 1120 
 1121         CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__,
 1122             inet_ntoa(*gina), ifp, ifp->if_xname);
 1123 
 1124         error = 0;
 1125         inm = NULL;
 1126 
 1127         /*
 1128          * If no imf was specified (i.e. kernel consumer),
 1129          * fake one up and assume it is an ASM join.
 1130          */
 1131         if (imf == NULL) {
 1132                 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
 1133                 imf = &timf;
 1134         }
 1135 
 1136         error = in_getmulti(ifp, gina, &inm);
 1137         if (error) {
 1138                 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
 1139                 return (error);
 1140         }
 1141 
 1142         CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 1143         error = inm_merge(inm, imf);
 1144         if (error) {
 1145                 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
 1146                 goto out_inm_release;
 1147         }
 1148 
 1149         CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 1150         error = igmp_change_state(inm);
 1151         if (error) {
 1152                 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
 1153                 goto out_inm_release;
 1154         }
 1155 
 1156 out_inm_release:
 1157         if (error) {
 1158                 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
 1159                 inm_release_locked(inm);
 1160         } else {
 1161                 *pinm = inm;
 1162         }
 1163 
 1164         return (error);
 1165 }
 1166 
 1167 /*
 1168  * Leave a multicast group; unlocked entry point.
 1169  */
 1170 int
 1171 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
 1172 {
 1173         struct ifnet *ifp;
 1174         int error;
 1175 
 1176         ifp = inm->inm_ifp;
 1177 
 1178         IN_MULTI_LOCK();
 1179         error = in_leavegroup_locked(inm, imf);
 1180         IN_MULTI_UNLOCK();
 1181 
 1182         return (error);
 1183 }
 1184 
 1185 /*
 1186  * Leave a multicast group; real entry point.
 1187  * All source filters will be expunged.
 1188  *
 1189  * Only preserves atomicity at inm level.
 1190  *
 1191  * Holding the write lock for the INP which contains imf
 1192  * is highly advisable. We can't assert for it as imf does not
 1193  * contain a back-pointer to the owning inp.
 1194  *
 1195  * Note: This is not the same as inm_release(*) as this function also
 1196  * makes a state change downcall into IGMP.
 1197  */
 1198 int
 1199 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
 1200 {
 1201         struct in_mfilter        timf;
 1202         int                      error;
 1203 
 1204         error = 0;
 1205 
 1206         IN_MULTI_LOCK_ASSERT();
 1207 
 1208         CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__,
 1209             inm, inet_ntoa(inm->inm_addr),
 1210             (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
 1211             imf);
 1212 
 1213         /*
 1214          * If no imf was specified (i.e. kernel consumer),
 1215          * fake one up and assume it is an ASM join.
 1216          */
 1217         if (imf == NULL) {
 1218                 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
 1219                 imf = &timf;
 1220         }
 1221 
 1222         /*
 1223          * Begin state merge transaction at IGMP layer.
 1224          *
 1225          * As this particular invocation should not cause any memory
 1226          * to be allocated, and there is no opportunity to roll back
 1227          * the transaction, it MUST NOT fail.
 1228          */
 1229         CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 1230         error = inm_merge(inm, imf);
 1231         KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
 1232 
 1233         CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 1234         error = igmp_change_state(inm);
 1235         if (error)
 1236                 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 1237 
 1238         CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
 1239         inm_release_locked(inm);
 1240 
 1241         return (error);
 1242 }
 1243 
 1244 /*#ifndef BURN_BRIDGES*/
 1245 /*
 1246  * Join an IPv4 multicast group in (*,G) exclusive mode.
 1247  * The group must be a 224.0.0.0/24 link-scope group.
 1248  * This KPI is for legacy kernel consumers only.
 1249  */
 1250 struct in_multi *
 1251 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
 1252 {
 1253         struct in_multi *pinm;
 1254         int error;
 1255 
 1256         KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
 1257             ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap)));
 1258 
 1259         error = in_joingroup(ifp, ap, NULL, &pinm);
 1260         if (error != 0)
 1261                 pinm = NULL;
 1262 
 1263         return (pinm);
 1264 }
 1265 
 1266 /*
 1267  * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
 1268  * This KPI is for legacy kernel consumers only.
 1269  */
 1270 void
 1271 in_delmulti(struct in_multi *inm)
 1272 {
 1273 
 1274         (void)in_leavegroup(inm, NULL);
 1275 }
 1276 /*#endif*/
 1277 
 1278 /*
 1279  * Block or unblock an ASM multicast source on an inpcb.
 1280  * This implements the delta-based API described in RFC 3678.
 1281  *
 1282  * The delta-based API applies only to exclusive-mode memberships.
 1283  * An IGMP downcall will be performed.
 1284  *
 1285  * SMPng: NOTE: Must take Giant as a join may create a new ifma.
 1286  *
 1287  * Return 0 if successful, otherwise return an appropriate error code.
 1288  */
 1289 static int
 1290 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
 1291 {
 1292         struct group_source_req          gsr;
 1293         sockunion_t                     *gsa, *ssa;
 1294         struct ifnet                    *ifp;
 1295         struct in_mfilter               *imf;
 1296         struct ip_moptions              *imo;
 1297         struct in_msource               *ims;
 1298         struct in_multi                 *inm;
 1299         size_t                           idx;
 1300         uint16_t                         fmode;
 1301         int                              error, doblock;
 1302 
 1303         ifp = NULL;
 1304         error = 0;
 1305         doblock = 0;
 1306 
 1307         memset(&gsr, 0, sizeof(struct group_source_req));
 1308         gsa = (sockunion_t *)&gsr.gsr_group;
 1309         ssa = (sockunion_t *)&gsr.gsr_source;
 1310 
 1311         switch (sopt->sopt_name) {
 1312         case IP_BLOCK_SOURCE:
 1313         case IP_UNBLOCK_SOURCE: {
 1314                 struct ip_mreq_source    mreqs;
 1315 
 1316                 error = sooptcopyin(sopt, &mreqs,
 1317                     sizeof(struct ip_mreq_source),
 1318                     sizeof(struct ip_mreq_source));
 1319                 if (error)
 1320                         return (error);
 1321 
 1322                 gsa->sin.sin_family = AF_INET;
 1323                 gsa->sin.sin_len = sizeof(struct sockaddr_in);
 1324                 gsa->sin.sin_addr = mreqs.imr_multiaddr;
 1325 
 1326                 ssa->sin.sin_family = AF_INET;
 1327                 ssa->sin.sin_len = sizeof(struct sockaddr_in);
 1328                 ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 1329 
 1330                 if (!in_nullhost(mreqs.imr_interface))
 1331                         INADDR_TO_IFP(mreqs.imr_interface, ifp);
 1332 
 1333                 if (sopt->sopt_name == IP_BLOCK_SOURCE)
 1334                         doblock = 1;
 1335 
 1336                 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
 1337                     __func__, inet_ntoa(mreqs.imr_interface), ifp);
 1338                 break;
 1339             }
 1340 
 1341         case MCAST_BLOCK_SOURCE:
 1342         case MCAST_UNBLOCK_SOURCE:
 1343                 error = sooptcopyin(sopt, &gsr,
 1344                     sizeof(struct group_source_req),
 1345                     sizeof(struct group_source_req));
 1346                 if (error)
 1347                         return (error);
 1348 
 1349                 if (gsa->sin.sin_family != AF_INET ||
 1350                     gsa->sin.sin_len != sizeof(struct sockaddr_in))
 1351                         return (EINVAL);
 1352 
 1353                 if (ssa->sin.sin_family != AF_INET ||
 1354                     ssa->sin.sin_len != sizeof(struct sockaddr_in))
 1355                         return (EINVAL);
 1356 
 1357                 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 1358                         return (EADDRNOTAVAIL);
 1359 
 1360                 ifp = ifnet_byindex(gsr.gsr_interface);
 1361 
 1362                 if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
 1363                         doblock = 1;
 1364                 break;
 1365 
 1366         default:
 1367                 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
 1368                     __func__, sopt->sopt_name);
 1369                 return (EOPNOTSUPP);
 1370                 break;
 1371         }
 1372 
 1373         if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 1374                 return (EINVAL);
 1375 
 1376         /*
 1377          * Check if we are actually a member of this group.
 1378          */
 1379         imo = inp_findmoptions(inp);
 1380         idx = imo_match_group(imo, ifp, &gsa->sa);
 1381         if (idx == -1 || imo->imo_mfilters == NULL) {
 1382                 error = EADDRNOTAVAIL;
 1383                 goto out_inp_locked;
 1384         }
 1385 
 1386         KASSERT(imo->imo_mfilters != NULL,
 1387             ("%s: imo_mfilters not allocated", __func__));
 1388         imf = &imo->imo_mfilters[idx];
 1389         inm = imo->imo_membership[idx];
 1390 
 1391         /*
 1392          * Attempting to use the delta-based API on an
 1393          * non exclusive-mode membership is an error.
 1394          */
 1395         fmode = imf->imf_st[0];
 1396         if (fmode != MCAST_EXCLUDE) {
 1397                 error = EINVAL;
 1398                 goto out_inp_locked;
 1399         }
 1400 
 1401         /*
 1402          * Deal with error cases up-front:
 1403          *  Asked to block, but already blocked; or
 1404          *  Asked to unblock, but nothing to unblock.
 1405          * If adding a new block entry, allocate it.
 1406          */
 1407         ims = imo_match_source(imo, idx, &ssa->sa);
 1408         if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
 1409                 CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
 1410                     inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not ");
 1411                 error = EADDRNOTAVAIL;
 1412                 goto out_inp_locked;
 1413         }
 1414 
 1415         INP_WLOCK_ASSERT(inp);
 1416 
 1417         /*
 1418          * Begin state merge transaction at socket layer.
 1419          */
 1420         if (doblock) {
 1421                 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
 1422                 ims = imf_graft(imf, fmode, &ssa->sin);
 1423                 if (ims == NULL)
 1424                         error = ENOMEM;
 1425         } else {
 1426                 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
 1427                 error = imf_prune(imf, &ssa->sin);
 1428         }
 1429 
 1430         if (error) {
 1431                 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
 1432                 goto out_imf_rollback;
 1433         }
 1434 
 1435         /*
 1436          * Begin state merge transaction at IGMP layer.
 1437          */
 1438         IN_MULTI_LOCK();
 1439 
 1440         CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 1441         error = inm_merge(inm, imf);
 1442         if (error) {
 1443                 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
 1444                 goto out_imf_rollback;
 1445         }
 1446 
 1447         CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 1448         error = igmp_change_state(inm);
 1449         if (error)
 1450                 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 1451 
 1452         IN_MULTI_UNLOCK();
 1453 
 1454 out_imf_rollback:
 1455         if (error)
 1456                 imf_rollback(imf);
 1457         else
 1458                 imf_commit(imf);
 1459 
 1460         imf_reap(imf);
 1461 
 1462 out_inp_locked:
 1463         INP_WUNLOCK(inp);
 1464         return (error);
 1465 }
 1466 
 1467 /*
 1468  * Given an inpcb, return its multicast options structure pointer.  Accepts
 1469  * an unlocked inpcb pointer, but will return it locked.  May sleep.
 1470  *
 1471  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
 1472  * SMPng: NOTE: Returns with the INP write lock held.
 1473  */
 1474 static struct ip_moptions *
 1475 inp_findmoptions(struct inpcb *inp)
 1476 {
 1477         struct ip_moptions       *imo;
 1478         struct in_multi         **immp;
 1479         struct in_mfilter        *imfp;
 1480         size_t                    idx;
 1481 
 1482         INP_WLOCK(inp);
 1483         if (inp->inp_moptions != NULL)
 1484                 return (inp->inp_moptions);
 1485 
 1486         INP_WUNLOCK(inp);
 1487 
 1488         imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
 1489         immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
 1490             M_WAITOK | M_ZERO);
 1491         imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
 1492             M_INMFILTER, M_WAITOK);
 1493 
 1494         imo->imo_multicast_ifp = NULL;
 1495         imo->imo_multicast_addr.s_addr = INADDR_ANY;
 1496         imo->imo_multicast_vif = -1;
 1497         imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 1498         imo->imo_multicast_loop = in_mcast_loop;
 1499         imo->imo_num_memberships = 0;
 1500         imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
 1501         imo->imo_membership = immp;
 1502 
 1503         /* Initialize per-group source filters. */
 1504         for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
 1505                 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
 1506         imo->imo_mfilters = imfp;
 1507 
 1508         INP_WLOCK(inp);
 1509         if (inp->inp_moptions != NULL) {
 1510                 free(imfp, M_INMFILTER);
 1511                 free(immp, M_IPMOPTS);
 1512                 free(imo, M_IPMOPTS);
 1513                 return (inp->inp_moptions);
 1514         }
 1515         inp->inp_moptions = imo;
 1516         return (imo);
 1517 }
 1518 
 1519 /*
 1520  * Discard the IP multicast options (and source filters).
 1521  *
 1522  * SMPng: NOTE: assumes INP write lock is held.
 1523  */
 1524 void
 1525 inp_freemoptions(struct ip_moptions *imo)
 1526 {
 1527         struct in_mfilter       *imf;
 1528         size_t                   idx, nmships;
 1529 
 1530         KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
 1531 
 1532         nmships = imo->imo_num_memberships;
 1533         for (idx = 0; idx < nmships; ++idx) {
 1534                 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
 1535                 if (imf)
 1536                         imf_leave(imf);
 1537                 (void)in_leavegroup(imo->imo_membership[idx], imf);
 1538                 if (imf)
 1539                         imf_purge(imf);
 1540         }
 1541 
 1542         if (imo->imo_mfilters)
 1543                 free(imo->imo_mfilters, M_INMFILTER);
 1544         free(imo->imo_membership, M_IPMOPTS);
 1545         free(imo, M_IPMOPTS);
 1546 }
 1547 
 1548 /*
 1549  * Atomically get source filters on a socket for an IPv4 multicast group.
 1550  * Called with INP lock held; returns with lock released.
 1551  */
 1552 static int
 1553 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
 1554 {
 1555         struct __msfilterreq     msfr;
 1556         sockunion_t             *gsa;
 1557         struct ifnet            *ifp;
 1558         struct ip_moptions      *imo;
 1559         struct in_mfilter       *imf;
 1560         struct ip_msource       *ims;
 1561         struct in_msource       *lims;
 1562         struct sockaddr_in      *psin;
 1563         struct sockaddr_storage *ptss;
 1564         struct sockaddr_storage *tss;
 1565         int                      error;
 1566         size_t                   idx, nsrcs, ncsrcs;
 1567 
 1568         INP_WLOCK_ASSERT(inp);
 1569 
 1570         imo = inp->inp_moptions;
 1571         KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
 1572 
 1573         INP_WUNLOCK(inp);
 1574 
 1575         error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 1576             sizeof(struct __msfilterreq));
 1577         if (error)
 1578                 return (error);
 1579 
 1580         if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
 1581                 return (EINVAL);
 1582 
 1583         ifp = ifnet_byindex(msfr.msfr_ifindex);
 1584         if (ifp == NULL)
 1585                 return (EINVAL);
 1586 
 1587         INP_WLOCK(inp);
 1588 
 1589         /*
 1590          * Lookup group on the socket.
 1591          */
 1592         gsa = (sockunion_t *)&msfr.msfr_group;
 1593         idx = imo_match_group(imo, ifp, &gsa->sa);
 1594         if (idx == -1 || imo->imo_mfilters == NULL) {
 1595                 INP_WUNLOCK(inp);
 1596                 return (EADDRNOTAVAIL);
 1597         }
 1598         imf = &imo->imo_mfilters[idx];
 1599 
 1600         /*
 1601          * Ignore memberships which are in limbo.
 1602          */
 1603         if (imf->imf_st[1] == MCAST_UNDEFINED) {
 1604                 INP_WUNLOCK(inp);
 1605                 return (EAGAIN);
 1606         }
 1607         msfr.msfr_fmode = imf->imf_st[1];
 1608 
 1609         /*
 1610          * If the user specified a buffer, copy out the source filter
 1611          * entries to userland gracefully.
 1612          * We only copy out the number of entries which userland
 1613          * has asked for, but we always tell userland how big the
 1614          * buffer really needs to be.
 1615          */
 1616         tss = NULL;
 1617         if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
 1618                 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 1619                     M_TEMP, M_NOWAIT | M_ZERO);
 1620                 if (tss == NULL) {
 1621                         INP_WUNLOCK(inp);
 1622                         return (ENOBUFS);
 1623                 }
 1624         }
 1625 
 1626         /*
 1627          * Count number of sources in-mode at t0.
 1628          * If buffer space exists and remains, copy out source entries.
 1629          */
 1630         nsrcs = msfr.msfr_nsrcs;
 1631         ncsrcs = 0;
 1632         ptss = tss;
 1633         RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
 1634                 lims = (struct in_msource *)ims;
 1635                 if (lims->imsl_st[0] == MCAST_UNDEFINED ||
 1636                     lims->imsl_st[0] != imf->imf_st[0])
 1637                         continue;
 1638                 ++ncsrcs;
 1639                 if (tss != NULL && nsrcs > 0) {
 1640                         psin = (struct sockaddr_in *)ptss;
 1641                         psin->sin_family = AF_INET;
 1642                         psin->sin_len = sizeof(struct sockaddr_in);
 1643                         psin->sin_addr.s_addr = htonl(lims->ims_haddr);
 1644                         psin->sin_port = 0;
 1645                         ++ptss;
 1646                         --nsrcs;
 1647                 }
 1648         }
 1649 
 1650         INP_WUNLOCK(inp);
 1651 
 1652         if (tss != NULL) {
 1653                 error = copyout(tss, msfr.msfr_srcs,
 1654                     sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 1655                 free(tss, M_TEMP);
 1656                 if (error)
 1657                         return (error);
 1658         }
 1659 
 1660         msfr.msfr_nsrcs = ncsrcs;
 1661         error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
 1662 
 1663         return (error);
 1664 }
 1665 
 1666 /*
 1667  * Return the IP multicast options in response to user getsockopt().
 1668  */
 1669 int
 1670 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
 1671 {
 1672         struct ip_mreqn          mreqn;
 1673         struct ip_moptions      *imo;
 1674         struct ifnet            *ifp;
 1675         struct in_ifaddr        *ia;
 1676         int                      error, optval;
 1677         u_char                   coptval;
 1678 
 1679         INP_WLOCK(inp);
 1680         imo = inp->inp_moptions;
 1681         /*
 1682          * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 1683          * or is a divert socket, reject it.
 1684          */
 1685         if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 1686             (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 1687             inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
 1688                 INP_WUNLOCK(inp);
 1689                 return (EOPNOTSUPP);
 1690         }
 1691 
 1692         error = 0;
 1693         switch (sopt->sopt_name) {
 1694         case IP_MULTICAST_VIF:
 1695                 if (imo != NULL)
 1696                         optval = imo->imo_multicast_vif;
 1697                 else
 1698                         optval = -1;
 1699                 INP_WUNLOCK(inp);
 1700                 error = sooptcopyout(sopt, &optval, sizeof(int));
 1701                 break;
 1702 
 1703         case IP_MULTICAST_IF:
 1704                 memset(&mreqn, 0, sizeof(struct ip_mreqn));
 1705                 if (imo != NULL) {
 1706                         ifp = imo->imo_multicast_ifp;
 1707                         if (!in_nullhost(imo->imo_multicast_addr)) {
 1708                                 mreqn.imr_address = imo->imo_multicast_addr;
 1709                         } else if (ifp != NULL) {
 1710                                 mreqn.imr_ifindex = ifp->if_index;
 1711                                 IFP_TO_IA(ifp, ia);
 1712                                 if (ia != NULL) {
 1713                                         mreqn.imr_address =
 1714                                             IA_SIN(ia)->sin_addr;
 1715                                         ifa_free(&ia->ia_ifa);
 1716                                 }
 1717                         }
 1718                 }
 1719                 INP_WUNLOCK(inp);
 1720                 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
 1721                         error = sooptcopyout(sopt, &mreqn,
 1722                             sizeof(struct ip_mreqn));
 1723                 } else {
 1724                         error = sooptcopyout(sopt, &mreqn.imr_address,
 1725                             sizeof(struct in_addr));
 1726                 }
 1727                 break;
 1728 
 1729         case IP_MULTICAST_TTL:
 1730                 if (imo == 0)
 1731                         optval = coptval = IP_DEFAULT_MULTICAST_TTL;
 1732                 else
 1733                         optval = coptval = imo->imo_multicast_ttl;
 1734                 INP_WUNLOCK(inp);
 1735                 if (sopt->sopt_valsize == sizeof(u_char))
 1736                         error = sooptcopyout(sopt, &coptval, sizeof(u_char));
 1737                 else
 1738                         error = sooptcopyout(sopt, &optval, sizeof(int));
 1739                 break;
 1740 
 1741         case IP_MULTICAST_LOOP:
 1742                 if (imo == 0)
 1743                         optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
 1744                 else
 1745                         optval = coptval = imo->imo_multicast_loop;
 1746                 INP_WUNLOCK(inp);
 1747                 if (sopt->sopt_valsize == sizeof(u_char))
 1748                         error = sooptcopyout(sopt, &coptval, sizeof(u_char));
 1749                 else
 1750                         error = sooptcopyout(sopt, &optval, sizeof(int));
 1751                 break;
 1752 
 1753         case IP_MSFILTER:
 1754                 if (imo == NULL) {
 1755                         error = EADDRNOTAVAIL;
 1756                         INP_WUNLOCK(inp);
 1757                 } else {
 1758                         error = inp_get_source_filters(inp, sopt);
 1759                 }
 1760                 break;
 1761 
 1762         default:
 1763                 INP_WUNLOCK(inp);
 1764                 error = ENOPROTOOPT;
 1765                 break;
 1766         }
 1767 
 1768         INP_UNLOCK_ASSERT(inp);
 1769 
 1770         return (error);
 1771 }
 1772 
 1773 /*
 1774  * Look up the ifnet to use for a multicast group membership,
 1775  * given the IPv4 address of an interface, and the IPv4 group address.
 1776  *
 1777  * This routine exists to support legacy multicast applications
 1778  * which do not understand that multicast memberships are scoped to
 1779  * specific physical links in the networking stack, or which need
 1780  * to join link-scope groups before IPv4 addresses are configured.
 1781  *
 1782  * If inp is non-NULL, use this socket's current FIB number for any
 1783  * required FIB lookup.
 1784  * If ina is INADDR_ANY, look up the group address in the unicast FIB,
 1785  * and use its ifp; usually, this points to the default next-hop.
 1786  *
 1787  * If the FIB lookup fails, attempt to use the first non-loopback
 1788  * interface with multicast capability in the system as a
 1789  * last resort. The legacy IPv4 ASM API requires that we do
 1790  * this in order to allow groups to be joined when the routing
 1791  * table has not yet been populated during boot.
 1792  *
 1793  * Returns NULL if no ifp could be found.
 1794  *
 1795  * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
 1796  * FUTURE: Implement IPv4 source-address selection.
 1797  */
 1798 static struct ifnet *
 1799 inp_lookup_mcast_ifp(const struct inpcb *inp,
 1800     const struct sockaddr_in *gsin, const struct in_addr ina)
 1801 {
 1802         struct ifnet *ifp;
 1803 
 1804         KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
 1805         KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
 1806             ("%s: not multicast", __func__));
 1807 
 1808         ifp = NULL;
 1809         if (!in_nullhost(ina)) {
 1810                 INADDR_TO_IFP(ina, ifp);
 1811         } else {
 1812                 struct route ro;
 1813 
 1814                 ro.ro_rt = NULL;
 1815                 memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
 1816                 in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0);
 1817                 if (ro.ro_rt != NULL) {
 1818                         ifp = ro.ro_rt->rt_ifp;
 1819                         KASSERT(ifp != NULL, ("%s: null ifp", __func__));
 1820                         RTFREE(ro.ro_rt);
 1821                 } else {
 1822                         struct in_ifaddr *ia;
 1823                         struct ifnet *mifp;
 1824 
 1825                         mifp = NULL;
 1826                         IN_IFADDR_RLOCK();
 1827                         TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 1828                                 mifp = ia->ia_ifp;
 1829                                 if (!(mifp->if_flags & IFF_LOOPBACK) &&
 1830                                      (mifp->if_flags & IFF_MULTICAST)) {
 1831                                         ifp = mifp;
 1832                                         break;
 1833                                 }
 1834                         }
 1835                         IN_IFADDR_RUNLOCK();
 1836                 }
 1837         }
 1838 
 1839         return (ifp);
 1840 }
 1841 
 1842 /*
 1843  * Join an IPv4 multicast group, possibly with a source.
 1844  */
 1845 static int
 1846 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 1847 {
 1848         struct group_source_req          gsr;
 1849         sockunion_t                     *gsa, *ssa;
 1850         struct ifnet                    *ifp;
 1851         struct in_mfilter               *imf;
 1852         struct ip_moptions              *imo;
 1853         struct in_multi                 *inm;
 1854         struct in_msource               *lims;
 1855         size_t                           idx;
 1856         int                              error, is_new;
 1857 
 1858         ifp = NULL;
 1859         imf = NULL;
 1860         error = 0;
 1861         is_new = 0;
 1862 
 1863         memset(&gsr, 0, sizeof(struct group_source_req));
 1864         gsa = (sockunion_t *)&gsr.gsr_group;
 1865         gsa->ss.ss_family = AF_UNSPEC;
 1866         ssa = (sockunion_t *)&gsr.gsr_source;
 1867         ssa->ss.ss_family = AF_UNSPEC;
 1868 
 1869         switch (sopt->sopt_name) {
 1870         case IP_ADD_MEMBERSHIP:
 1871         case IP_ADD_SOURCE_MEMBERSHIP: {
 1872                 struct ip_mreq_source    mreqs;
 1873 
 1874                 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
 1875                         error = sooptcopyin(sopt, &mreqs,
 1876                             sizeof(struct ip_mreq),
 1877                             sizeof(struct ip_mreq));
 1878                         /*
 1879                          * Do argument switcharoo from ip_mreq into
 1880                          * ip_mreq_source to avoid using two instances.
 1881                          */
 1882                         mreqs.imr_interface = mreqs.imr_sourceaddr;
 1883                         mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
 1884                 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
 1885                         error = sooptcopyin(sopt, &mreqs,
 1886                             sizeof(struct ip_mreq_source),
 1887                             sizeof(struct ip_mreq_source));
 1888                 }
 1889                 if (error)
 1890                         return (error);
 1891 
 1892                 gsa->sin.sin_family = AF_INET;
 1893                 gsa->sin.sin_len = sizeof(struct sockaddr_in);
 1894                 gsa->sin.sin_addr = mreqs.imr_multiaddr;
 1895 
 1896                 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
 1897                         ssa->sin.sin_family = AF_INET;
 1898                         ssa->sin.sin_len = sizeof(struct sockaddr_in);
 1899                         ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 1900                 }
 1901 
 1902                 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 1903                         return (EINVAL);
 1904 
 1905                 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
 1906                     mreqs.imr_interface);
 1907                 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
 1908                     __func__, inet_ntoa(mreqs.imr_interface), ifp);
 1909                 break;
 1910         }
 1911 
 1912         case MCAST_JOIN_GROUP:
 1913         case MCAST_JOIN_SOURCE_GROUP:
 1914                 if (sopt->sopt_name == MCAST_JOIN_GROUP) {
 1915                         error = sooptcopyin(sopt, &gsr,
 1916                             sizeof(struct group_req),
 1917                             sizeof(struct group_req));
 1918                 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 1919                         error = sooptcopyin(sopt, &gsr,
 1920                             sizeof(struct group_source_req),
 1921                             sizeof(struct group_source_req));
 1922                 }
 1923                 if (error)
 1924                         return (error);
 1925 
 1926                 if (gsa->sin.sin_family != AF_INET ||
 1927                     gsa->sin.sin_len != sizeof(struct sockaddr_in))
 1928                         return (EINVAL);
 1929 
 1930                 /*
 1931                  * Overwrite the port field if present, as the sockaddr
 1932                  * being copied in may be matched with a binary comparison.
 1933                  */
 1934                 gsa->sin.sin_port = 0;
 1935                 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 1936                         if (ssa->sin.sin_family != AF_INET ||
 1937                             ssa->sin.sin_len != sizeof(struct sockaddr_in))
 1938                                 return (EINVAL);
 1939                         ssa->sin.sin_port = 0;
 1940                 }
 1941 
 1942                 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 1943                         return (EINVAL);
 1944 
 1945                 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 1946                         return (EADDRNOTAVAIL);
 1947                 ifp = ifnet_byindex(gsr.gsr_interface);
 1948                 break;
 1949 
 1950         default:
 1951                 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
 1952                     __func__, sopt->sopt_name);
 1953                 return (EOPNOTSUPP);
 1954                 break;
 1955         }
 1956 
 1957         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
 1958                 return (EADDRNOTAVAIL);
 1959 
 1960         imo = inp_findmoptions(inp);
 1961         idx = imo_match_group(imo, ifp, &gsa->sa);
 1962         if (idx == -1) {
 1963                 is_new = 1;
 1964         } else {
 1965                 inm = imo->imo_membership[idx];
 1966                 imf = &imo->imo_mfilters[idx];
 1967                 if (ssa->ss.ss_family != AF_UNSPEC) {
 1968                         /*
 1969                          * MCAST_JOIN_SOURCE on an exclusive membership
 1970                          * is an error. On an existing inclusive membership,
 1971                          * it just adds the source to the filter list.
 1972                          */
 1973                         if (imf->imf_st[1] != MCAST_INCLUDE) {
 1974                                 error = EINVAL;
 1975                                 goto out_inp_locked;
 1976                         }
 1977                         /* Throw out duplicates. */
 1978                         lims = imo_match_source(imo, idx, &ssa->sa);
 1979                         if (lims != NULL) {
 1980                                 error = EADDRNOTAVAIL;
 1981                                 goto out_inp_locked;
 1982                         }
 1983                 } else {
 1984                         /*
 1985                          * MCAST_JOIN_GROUP on an existing inclusive
 1986                          * membership is an error; if you want to change
 1987                          * filter mode, you must use the userland API
 1988                          * setsourcefilter().
 1989                          */
 1990                         if (imf->imf_st[1] == MCAST_INCLUDE) {
 1991                                 error = EINVAL;
 1992                                 goto out_inp_locked;
 1993                         }
 1994                 }
 1995         }
 1996 
 1997         /*
 1998          * Begin state merge transaction at socket layer.
 1999          */
 2000         INP_WLOCK_ASSERT(inp);
 2001 
 2002         if (is_new) {
 2003                 if (imo->imo_num_memberships == imo->imo_max_memberships) {
 2004                         error = imo_grow(imo);
 2005                         if (error)
 2006                                 goto out_inp_locked;
 2007                 }
 2008                 /*
 2009                  * Allocate the new slot upfront so we can deal with
 2010                  * grafting the new source filter in same code path
 2011                  * as for join-source on existing membership.
 2012                  */
 2013                 idx = imo->imo_num_memberships;
 2014                 imo->imo_membership[idx] = NULL;
 2015                 imo->imo_num_memberships++;
 2016                 KASSERT(imo->imo_mfilters != NULL,
 2017                     ("%s: imf_mfilters vector was not allocated", __func__));
 2018                 imf = &imo->imo_mfilters[idx];
 2019                 KASSERT(RB_EMPTY(&imf->imf_sources),
 2020                     ("%s: imf_sources not empty", __func__));
 2021         }
 2022 
 2023         /*
 2024          * Graft new source into filter list for this inpcb's
 2025          * membership of the group. The in_multi may not have
 2026          * been allocated yet if this is a new membership, however,
 2027          * the in_mfilter slot will be allocated and must be initialized.
 2028          */
 2029         if (ssa->ss.ss_family != AF_UNSPEC) {
 2030                 /* Membership starts in IN mode */
 2031                 if (is_new) {
 2032                         CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
 2033                         imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
 2034                 } else {
 2035                         CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
 2036                 }
 2037                 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
 2038                 if (lims == NULL) {
 2039                         CTR1(KTR_IGMPV3, "%s: merge imf state failed",
 2040                             __func__);
 2041                         error = ENOMEM;
 2042                         goto out_imo_free;
 2043                 }
 2044         } else {
 2045                 /* No address specified; Membership starts in EX mode */
 2046                 if (is_new) {
 2047                         CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
 2048                         imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
 2049                 }
 2050         }
 2051 
 2052         /*
 2053          * Begin state merge transaction at IGMP layer.
 2054          */
 2055         IN_MULTI_LOCK();
 2056 
 2057         if (is_new) {
 2058                 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
 2059                     &inm);
 2060                 if (error)
 2061                         goto out_imo_free;
 2062                 imo->imo_membership[idx] = inm;
 2063         } else {
 2064                 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 2065                 error = inm_merge(inm, imf);
 2066                 if (error) {
 2067                         CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
 2068                             __func__);
 2069                         goto out_imf_rollback;
 2070                 }
 2071                 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 2072                 error = igmp_change_state(inm);
 2073                 if (error) {
 2074                         CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
 2075                             __func__);
 2076                         goto out_imf_rollback;
 2077                 }
 2078         }
 2079 
 2080         IN_MULTI_UNLOCK();
 2081 
 2082 out_imf_rollback:
 2083         INP_WLOCK_ASSERT(inp);
 2084         if (error) {
 2085                 imf_rollback(imf);
 2086                 if (is_new)
 2087                         imf_purge(imf);
 2088                 else
 2089                         imf_reap(imf);
 2090         } else {
 2091                 imf_commit(imf);
 2092         }
 2093 
 2094 out_imo_free:
 2095         if (error && is_new) {
 2096                 imo->imo_membership[idx] = NULL;
 2097                 --imo->imo_num_memberships;
 2098         }
 2099 
 2100 out_inp_locked:
 2101         INP_WUNLOCK(inp);
 2102         return (error);
 2103 }
 2104 
 2105 /*
 2106  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
 2107  */
 2108 static int
 2109 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
 2110 {
 2111         struct group_source_req          gsr;
 2112         struct ip_mreq_source            mreqs;
 2113         sockunion_t                     *gsa, *ssa;
 2114         struct ifnet                    *ifp;
 2115         struct in_mfilter               *imf;
 2116         struct ip_moptions              *imo;
 2117         struct in_msource               *ims;
 2118         struct in_multi                 *inm;
 2119         size_t                           idx;
 2120         int                              error, is_final;
 2121 
 2122         ifp = NULL;
 2123         error = 0;
 2124         is_final = 1;
 2125 
 2126         memset(&gsr, 0, sizeof(struct group_source_req));
 2127         gsa = (sockunion_t *)&gsr.gsr_group;
 2128         gsa->ss.ss_family = AF_UNSPEC;
 2129         ssa = (sockunion_t *)&gsr.gsr_source;
 2130         ssa->ss.ss_family = AF_UNSPEC;
 2131 
 2132         switch (sopt->sopt_name) {
 2133         case IP_DROP_MEMBERSHIP:
 2134         case IP_DROP_SOURCE_MEMBERSHIP:
 2135                 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
 2136                         error = sooptcopyin(sopt, &mreqs,
 2137                             sizeof(struct ip_mreq),
 2138                             sizeof(struct ip_mreq));
 2139                         /*
 2140                          * Swap interface and sourceaddr arguments,
 2141                          * as ip_mreq and ip_mreq_source are laid
 2142                          * out differently.
 2143                          */
 2144                         mreqs.imr_interface = mreqs.imr_sourceaddr;
 2145                         mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
 2146                 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
 2147                         error = sooptcopyin(sopt, &mreqs,
 2148                             sizeof(struct ip_mreq_source),
 2149                             sizeof(struct ip_mreq_source));
 2150                 }
 2151                 if (error)
 2152                         return (error);
 2153 
 2154                 gsa->sin.sin_family = AF_INET;
 2155                 gsa->sin.sin_len = sizeof(struct sockaddr_in);
 2156                 gsa->sin.sin_addr = mreqs.imr_multiaddr;
 2157 
 2158                 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
 2159                         ssa->sin.sin_family = AF_INET;
 2160                         ssa->sin.sin_len = sizeof(struct sockaddr_in);
 2161                         ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 2162                 }
 2163 
 2164                 if (!in_nullhost(gsa->sin.sin_addr))
 2165                         INADDR_TO_IFP(mreqs.imr_interface, ifp);
 2166 
 2167                 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
 2168                     __func__, inet_ntoa(mreqs.imr_interface), ifp);
 2169 
 2170                 break;
 2171 
 2172         case MCAST_LEAVE_GROUP:
 2173         case MCAST_LEAVE_SOURCE_GROUP:
 2174                 if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
 2175                         error = sooptcopyin(sopt, &gsr,
 2176                             sizeof(struct group_req),
 2177                             sizeof(struct group_req));
 2178                 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 2179                         error = sooptcopyin(sopt, &gsr,
 2180                             sizeof(struct group_source_req),
 2181                             sizeof(struct group_source_req));
 2182                 }
 2183                 if (error)
 2184                         return (error);
 2185 
 2186                 if (gsa->sin.sin_family != AF_INET ||
 2187                     gsa->sin.sin_len != sizeof(struct sockaddr_in))
 2188                         return (EINVAL);
 2189 
 2190                 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 2191                         if (ssa->sin.sin_family != AF_INET ||
 2192                             ssa->sin.sin_len != sizeof(struct sockaddr_in))
 2193                                 return (EINVAL);
 2194                 }
 2195 
 2196                 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 2197                         return (EADDRNOTAVAIL);
 2198 
 2199                 ifp = ifnet_byindex(gsr.gsr_interface);
 2200                 break;
 2201 
 2202         default:
 2203                 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
 2204                     __func__, sopt->sopt_name);
 2205                 return (EOPNOTSUPP);
 2206                 break;
 2207         }
 2208 
 2209         if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 2210                 return (EINVAL);
 2211 
 2212         if (ifp == NULL)
 2213                 return (EADDRNOTAVAIL);
 2214 
 2215         /*
 2216          * Find the membership in the membership array.
 2217          */
 2218         imo = inp_findmoptions(inp);
 2219         idx = imo_match_group(imo, ifp, &gsa->sa);
 2220         if (idx == -1) {
 2221                 error = EADDRNOTAVAIL;
 2222                 goto out_inp_locked;
 2223         }
 2224         inm = imo->imo_membership[idx];
 2225         imf = &imo->imo_mfilters[idx];
 2226 
 2227         if (ssa->ss.ss_family != AF_UNSPEC)
 2228                 is_final = 0;
 2229 
 2230         /*
 2231          * Begin state merge transaction at socket layer.
 2232          */
 2233         INP_WLOCK_ASSERT(inp);
 2234 
 2235         /*
 2236          * If we were instructed only to leave a given source, do so.
 2237          * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
 2238          */
 2239         if (is_final) {
 2240                 imf_leave(imf);
 2241         } else {
 2242                 if (imf->imf_st[0] == MCAST_EXCLUDE) {
 2243                         error = EADDRNOTAVAIL;
 2244                         goto out_inp_locked;
 2245                 }
 2246                 ims = imo_match_source(imo, idx, &ssa->sa);
 2247                 if (ims == NULL) {
 2248                         CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
 2249                             inet_ntoa(ssa->sin.sin_addr), "not ");
 2250                         error = EADDRNOTAVAIL;
 2251                         goto out_inp_locked;
 2252                 }
 2253                 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
 2254                 error = imf_prune(imf, &ssa->sin);
 2255                 if (error) {
 2256                         CTR1(KTR_IGMPV3, "%s: merge imf state failed",
 2257                             __func__);
 2258                         goto out_inp_locked;
 2259                 }
 2260         }
 2261 
 2262         /*
 2263          * Begin state merge transaction at IGMP layer.
 2264          */
 2265         IN_MULTI_LOCK();
 2266 
 2267         if (is_final) {
 2268                 /*
 2269                  * Give up the multicast address record to which
 2270                  * the membership points.
 2271                  */
 2272                 (void)in_leavegroup_locked(inm, imf);
 2273         } else {
 2274                 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 2275                 error = inm_merge(inm, imf);
 2276                 if (error) {
 2277                         CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
 2278                             __func__);
 2279                         goto out_imf_rollback;
 2280                 }
 2281 
 2282                 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 2283                 error = igmp_change_state(inm);
 2284                 if (error) {
 2285                         CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
 2286                             __func__);
 2287                 }
 2288         }
 2289 
 2290         IN_MULTI_UNLOCK();
 2291 
 2292 out_imf_rollback:
 2293         if (error)
 2294                 imf_rollback(imf);
 2295         else
 2296                 imf_commit(imf);
 2297 
 2298         imf_reap(imf);
 2299 
 2300         if (is_final) {
 2301                 /* Remove the gap in the membership and filter array. */
 2302                 for (++idx; idx < imo->imo_num_memberships; ++idx) {
 2303                         imo->imo_membership[idx-1] = imo->imo_membership[idx];
 2304                         imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx];
 2305                 }
 2306                 imo->imo_num_memberships--;
 2307         }
 2308 
 2309 out_inp_locked:
 2310         INP_WUNLOCK(inp);
 2311         return (error);
 2312 }
 2313 
 2314 /*
 2315  * Select the interface for transmitting IPv4 multicast datagrams.
 2316  *
 2317  * Either an instance of struct in_addr or an instance of struct ip_mreqn
 2318  * may be passed to this socket option. An address of INADDR_ANY or an
 2319  * interface index of 0 is used to remove a previous selection.
 2320  * When no interface is selected, one is chosen for every send.
 2321  */
 2322 static int
 2323 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
 2324 {
 2325         struct in_addr           addr;
 2326         struct ip_mreqn          mreqn;
 2327         struct ifnet            *ifp;
 2328         struct ip_moptions      *imo;
 2329         int                      error;
 2330 
 2331         if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
 2332                 /*
 2333                  * An interface index was specified using the
 2334                  * Linux-derived ip_mreqn structure.
 2335                  */
 2336                 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
 2337                     sizeof(struct ip_mreqn));
 2338                 if (error)
 2339                         return (error);
 2340 
 2341                 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
 2342                         return (EINVAL);
 2343 
 2344                 if (mreqn.imr_ifindex == 0) {
 2345                         ifp = NULL;
 2346                 } else {
 2347                         ifp = ifnet_byindex(mreqn.imr_ifindex);
 2348                         if (ifp == NULL)
 2349                                 return (EADDRNOTAVAIL);
 2350                 }
 2351         } else {
 2352                 /*
 2353                  * An interface was specified by IPv4 address.
 2354                  * This is the traditional BSD usage.
 2355                  */
 2356                 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
 2357                     sizeof(struct in_addr));
 2358                 if (error)
 2359                         return (error);
 2360                 if (in_nullhost(addr)) {
 2361                         ifp = NULL;
 2362                 } else {
 2363                         INADDR_TO_IFP(addr, ifp);
 2364                         if (ifp == NULL)
 2365                                 return (EADDRNOTAVAIL);
 2366                 }
 2367                 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp,
 2368                     inet_ntoa(addr));
 2369         }
 2370 
 2371         /* Reject interfaces which do not support multicast. */
 2372         if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
 2373                 return (EOPNOTSUPP);
 2374 
 2375         imo = inp_findmoptions(inp);
 2376         imo->imo_multicast_ifp = ifp;
 2377         imo->imo_multicast_addr.s_addr = INADDR_ANY;
 2378         INP_WUNLOCK(inp);
 2379 
 2380         return (0);
 2381 }
 2382 
 2383 /*
 2384  * Atomically set source filters on a socket for an IPv4 multicast group.
 2385  *
 2386  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
 2387  */
 2388 static int
 2389 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
 2390 {
 2391         struct __msfilterreq     msfr;
 2392         sockunion_t             *gsa;
 2393         struct ifnet            *ifp;
 2394         struct in_mfilter       *imf;
 2395         struct ip_moptions      *imo;
 2396         struct in_multi         *inm;
 2397         size_t                   idx;
 2398         int                      error;
 2399 
 2400         error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 2401             sizeof(struct __msfilterreq));
 2402         if (error)
 2403                 return (error);
 2404 
 2405         if (msfr.msfr_nsrcs > in_mcast_maxsocksrc ||
 2406             (msfr.msfr_fmode != MCAST_EXCLUDE &&
 2407              msfr.msfr_fmode != MCAST_INCLUDE))
 2408                 return (EINVAL);
 2409 
 2410         if (msfr.msfr_group.ss_family != AF_INET ||
 2411             msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
 2412                 return (EINVAL);
 2413 
 2414         gsa = (sockunion_t *)&msfr.msfr_group;
 2415         if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 2416                 return (EINVAL);
 2417 
 2418         gsa->sin.sin_port = 0;  /* ignore port */
 2419 
 2420         if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
 2421                 return (EADDRNOTAVAIL);
 2422 
 2423         ifp = ifnet_byindex(msfr.msfr_ifindex);
 2424         if (ifp == NULL)
 2425                 return (EADDRNOTAVAIL);
 2426 
 2427         /*
 2428          * Take the INP write lock.
 2429          * Check if this socket is a member of this group.
 2430          */
 2431         imo = inp_findmoptions(inp);
 2432         idx = imo_match_group(imo, ifp, &gsa->sa);
 2433         if (idx == -1 || imo->imo_mfilters == NULL) {
 2434                 error = EADDRNOTAVAIL;
 2435                 goto out_inp_locked;
 2436         }
 2437         inm = imo->imo_membership[idx];
 2438         imf = &imo->imo_mfilters[idx];
 2439 
 2440         /*
 2441          * Begin state merge transaction at socket layer.
 2442          */
 2443         INP_WLOCK_ASSERT(inp);
 2444 
 2445         imf->imf_st[1] = msfr.msfr_fmode;
 2446 
 2447         /*
 2448          * Apply any new source filters, if present.
 2449          * Make a copy of the user-space source vector so
 2450          * that we may copy them with a single copyin. This
 2451          * allows us to deal with page faults up-front.
 2452          */
 2453         if (msfr.msfr_nsrcs > 0) {
 2454                 struct in_msource       *lims;
 2455                 struct sockaddr_in      *psin;
 2456                 struct sockaddr_storage *kss, *pkss;
 2457                 int                      i;
 2458 
 2459                 INP_WUNLOCK(inp);
 2460  
 2461                 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
 2462                     __func__, (unsigned long)msfr.msfr_nsrcs);
 2463                 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 2464                     M_TEMP, M_WAITOK);
 2465                 error = copyin(msfr.msfr_srcs, kss,
 2466                     sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 2467                 if (error) {
 2468                         free(kss, M_TEMP);
 2469                         return (error);
 2470                 }
 2471 
 2472                 INP_WLOCK(inp);
 2473 
 2474                 /*
 2475                  * Mark all source filters as UNDEFINED at t1.
 2476                  * Restore new group filter mode, as imf_leave()
 2477                  * will set it to INCLUDE.
 2478                  */
 2479                 imf_leave(imf);
 2480                 imf->imf_st[1] = msfr.msfr_fmode;
 2481 
 2482                 /*
 2483                  * Update socket layer filters at t1, lazy-allocating
 2484                  * new entries. This saves a bunch of memory at the
 2485                  * cost of one RB_FIND() per source entry; duplicate
 2486                  * entries in the msfr_nsrcs vector are ignored.
 2487                  * If we encounter an error, rollback transaction.
 2488                  *
 2489                  * XXX This too could be replaced with a set-symmetric
 2490                  * difference like loop to avoid walking from root
 2491                  * every time, as the key space is common.
 2492                  */
 2493                 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
 2494                         psin = (struct sockaddr_in *)pkss;
 2495                         if (psin->sin_family != AF_INET) {
 2496                                 error = EAFNOSUPPORT;
 2497                                 break;
 2498                         }
 2499                         if (psin->sin_len != sizeof(struct sockaddr_in)) {
 2500                                 error = EINVAL;
 2501                                 break;
 2502                         }
 2503                         error = imf_get_source(imf, psin, &lims);
 2504                         if (error)
 2505                                 break;
 2506                         lims->imsl_st[1] = imf->imf_st[1];
 2507                 }
 2508                 free(kss, M_TEMP);
 2509         }
 2510 
 2511         if (error)
 2512                 goto out_imf_rollback;
 2513 
 2514         INP_WLOCK_ASSERT(inp);
 2515         IN_MULTI_LOCK();
 2516 
 2517         /*
 2518          * Begin state merge transaction at IGMP layer.
 2519          */
 2520         CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 2521         error = inm_merge(inm, imf);
 2522         if (error) {
 2523                 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
 2524                 goto out_imf_rollback;
 2525         }
 2526 
 2527         CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 2528         error = igmp_change_state(inm);
 2529         if (error)
 2530                 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 2531 
 2532         IN_MULTI_UNLOCK();
 2533 
 2534 out_imf_rollback:
 2535         if (error)
 2536                 imf_rollback(imf);
 2537         else
 2538                 imf_commit(imf);
 2539 
 2540         imf_reap(imf);
 2541 
 2542 out_inp_locked:
 2543         INP_WUNLOCK(inp);
 2544         return (error);
 2545 }
 2546 
 2547 /*
 2548  * Set the IP multicast options in response to user setsockopt().
 2549  *
 2550  * Many of the socket options handled in this function duplicate the
 2551  * functionality of socket options in the regular unicast API. However,
 2552  * it is not possible to merge the duplicate code, because the idempotence
 2553  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
 2554  * the effects of these options must be treated as separate and distinct.
 2555  *
 2556  * SMPng: XXX: Unlocked read of inp_socket believed OK.
 2557  * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
 2558  * is refactored to no longer use vifs.
 2559  */
 2560 int
 2561 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
 2562 {
 2563         struct ip_moptions      *imo;
 2564         int                      error;
 2565 
 2566         error = 0;
 2567 
 2568         /*
 2569          * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 2570          * or is a divert socket, reject it.
 2571          */
 2572         if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 2573             (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 2574              inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
 2575                 return (EOPNOTSUPP);
 2576 
 2577         switch (sopt->sopt_name) {
 2578         case IP_MULTICAST_VIF: {
 2579                 int vifi;
 2580                 /*
 2581                  * Select a multicast VIF for transmission.
 2582                  * Only useful if multicast forwarding is active.
 2583                  */
 2584                 if (legal_vif_num == NULL) {
 2585                         error = EOPNOTSUPP;
 2586                         break;
 2587                 }
 2588                 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
 2589                 if (error)
 2590                         break;
 2591                 if (!legal_vif_num(vifi) && (vifi != -1)) {
 2592                         error = EINVAL;
 2593                         break;
 2594                 }
 2595                 imo = inp_findmoptions(inp);
 2596                 imo->imo_multicast_vif = vifi;
 2597                 INP_WUNLOCK(inp);
 2598                 break;
 2599         }
 2600 
 2601         case IP_MULTICAST_IF:
 2602                 error = inp_set_multicast_if(inp, sopt);
 2603                 break;
 2604 
 2605         case IP_MULTICAST_TTL: {
 2606                 u_char ttl;
 2607 
 2608                 /*
 2609                  * Set the IP time-to-live for outgoing multicast packets.
 2610                  * The original multicast API required a char argument,
 2611                  * which is inconsistent with the rest of the socket API.
 2612                  * We allow either a char or an int.
 2613                  */
 2614                 if (sopt->sopt_valsize == sizeof(u_char)) {
 2615                         error = sooptcopyin(sopt, &ttl, sizeof(u_char),
 2616                             sizeof(u_char));
 2617                         if (error)
 2618                                 break;
 2619                 } else {
 2620                         u_int ittl;
 2621 
 2622                         error = sooptcopyin(sopt, &ittl, sizeof(u_int),
 2623                             sizeof(u_int));
 2624                         if (error)
 2625                                 break;
 2626                         if (ittl > 255) {
 2627                                 error = EINVAL;
 2628                                 break;
 2629                         }
 2630                         ttl = (u_char)ittl;
 2631                 }
 2632                 imo = inp_findmoptions(inp);
 2633                 imo->imo_multicast_ttl = ttl;
 2634                 INP_WUNLOCK(inp);
 2635                 break;
 2636         }
 2637 
 2638         case IP_MULTICAST_LOOP: {
 2639                 u_char loop;
 2640 
 2641                 /*
 2642                  * Set the loopback flag for outgoing multicast packets.
 2643                  * Must be zero or one.  The original multicast API required a
 2644                  * char argument, which is inconsistent with the rest
 2645                  * of the socket API.  We allow either a char or an int.
 2646                  */
 2647                 if (sopt->sopt_valsize == sizeof(u_char)) {
 2648                         error = sooptcopyin(sopt, &loop, sizeof(u_char),
 2649                             sizeof(u_char));
 2650                         if (error)
 2651                                 break;
 2652                 } else {
 2653                         u_int iloop;
 2654 
 2655                         error = sooptcopyin(sopt, &iloop, sizeof(u_int),
 2656                                             sizeof(u_int));
 2657                         if (error)
 2658                                 break;
 2659                         loop = (u_char)iloop;
 2660                 }
 2661                 imo = inp_findmoptions(inp);
 2662                 imo->imo_multicast_loop = !!loop;
 2663                 INP_WUNLOCK(inp);
 2664                 break;
 2665         }
 2666 
 2667         case IP_ADD_MEMBERSHIP:
 2668         case IP_ADD_SOURCE_MEMBERSHIP:
 2669         case MCAST_JOIN_GROUP:
 2670         case MCAST_JOIN_SOURCE_GROUP:
 2671                 error = inp_join_group(inp, sopt);
 2672                 break;
 2673 
 2674         case IP_DROP_MEMBERSHIP:
 2675         case IP_DROP_SOURCE_MEMBERSHIP:
 2676         case MCAST_LEAVE_GROUP:
 2677         case MCAST_LEAVE_SOURCE_GROUP:
 2678                 error = inp_leave_group(inp, sopt);
 2679                 break;
 2680 
 2681         case IP_BLOCK_SOURCE:
 2682         case IP_UNBLOCK_SOURCE:
 2683         case MCAST_BLOCK_SOURCE:
 2684         case MCAST_UNBLOCK_SOURCE:
 2685                 error = inp_block_unblock_source(inp, sopt);
 2686                 break;
 2687 
 2688         case IP_MSFILTER:
 2689                 error = inp_set_source_filters(inp, sopt);
 2690                 break;
 2691 
 2692         default:
 2693                 error = EOPNOTSUPP;
 2694                 break;
 2695         }
 2696 
 2697         INP_UNLOCK_ASSERT(inp);
 2698 
 2699         return (error);
 2700 }
 2701 
 2702 /*
 2703  * Expose IGMP's multicast filter mode and source list(s) to userland,
 2704  * keyed by (ifindex, group).
 2705  * The filter mode is written out as a uint32_t, followed by
 2706  * 0..n of struct in_addr.
 2707  * For use by ifmcstat(8).
 2708  * SMPng: NOTE: unlocked read of ifindex space.
 2709  */
 2710 static int
 2711 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
 2712 {
 2713         struct in_addr                   src, group;
 2714         struct ifnet                    *ifp;
 2715         struct ifmultiaddr              *ifma;
 2716         struct in_multi                 *inm;
 2717         struct ip_msource               *ims;
 2718         int                             *name;
 2719         int                              retval;
 2720         u_int                            namelen;
 2721         uint32_t                         fmode, ifindex;
 2722 
 2723         name = (int *)arg1;
 2724         namelen = arg2;
 2725 
 2726         if (req->newptr != NULL)
 2727                 return (EPERM);
 2728 
 2729         if (namelen != 2)
 2730                 return (EINVAL);
 2731 
 2732         ifindex = name[0];
 2733         if (ifindex <= 0 || ifindex > V_if_index) {
 2734                 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
 2735                     __func__, ifindex);
 2736                 return (ENOENT);
 2737         }
 2738 
 2739         group.s_addr = name[1];
 2740         if (!IN_MULTICAST(ntohl(group.s_addr))) {
 2741                 CTR2(KTR_IGMPV3, "%s: group %s is not multicast",
 2742                     __func__, inet_ntoa(group));
 2743                 return (EINVAL);
 2744         }
 2745 
 2746         ifp = ifnet_byindex(ifindex);
 2747         if (ifp == NULL) {
 2748                 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
 2749                     __func__, ifindex);
 2750                 return (ENOENT);
 2751         }
 2752 
 2753         retval = sysctl_wire_old_buffer(req,
 2754             sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
 2755         if (retval)
 2756                 return (retval);
 2757 
 2758         IN_MULTI_LOCK();
 2759 
 2760         IF_ADDR_LOCK(ifp);
 2761         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 2762                 if (ifma->ifma_addr->sa_family != AF_INET ||
 2763                     ifma->ifma_protospec == NULL)
 2764                         continue;
 2765                 inm = (struct in_multi *)ifma->ifma_protospec;
 2766                 if (!in_hosteq(inm->inm_addr, group))
 2767                         continue;
 2768                 fmode = inm->inm_st[1].iss_fmode;
 2769                 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
 2770                 if (retval != 0)
 2771                         break;
 2772                 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
 2773 #ifdef KTR
 2774                         struct in_addr ina;
 2775                         ina.s_addr = htonl(ims->ims_haddr);
 2776                         CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
 2777                             inet_ntoa(ina));
 2778 #endif
 2779                         /*
 2780                          * Only copy-out sources which are in-mode.
 2781                          */
 2782                         if (fmode != ims_get_mode(inm, ims, 1)) {
 2783                                 CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
 2784                                     __func__);
 2785                                 continue;
 2786                         }
 2787                         src.s_addr = htonl(ims->ims_haddr);
 2788                         retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
 2789                         if (retval != 0)
 2790                                 break;
 2791                 }
 2792         }
 2793         IF_ADDR_UNLOCK(ifp);
 2794 
 2795         IN_MULTI_UNLOCK();
 2796 
 2797         return (retval);
 2798 }
 2799 
 2800 #ifdef KTR
 2801 
 2802 static const char *inm_modestrs[] = { "un", "in", "ex" };
 2803 
 2804 static const char *
 2805 inm_mode_str(const int mode)
 2806 {
 2807 
 2808         if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
 2809                 return (inm_modestrs[mode]);
 2810         return ("??");
 2811 }
 2812 
 2813 static const char *inm_statestrs[] = {
 2814         "not-member",
 2815         "silent",
 2816         "idle",
 2817         "lazy",
 2818         "sleeping",
 2819         "awakening",
 2820         "query-pending",
 2821         "sg-query-pending",
 2822         "leaving"
 2823 };
 2824 
 2825 static const char *
 2826 inm_state_str(const int state)
 2827 {
 2828 
 2829         if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
 2830                 return (inm_statestrs[state]);
 2831         return ("??");
 2832 }
 2833 
 2834 /*
 2835  * Dump an in_multi structure to the console.
 2836  */
 2837 void
 2838 inm_print(const struct in_multi *inm)
 2839 {
 2840         int t;
 2841 
 2842         if ((ktr_mask & KTR_IGMPV3) == 0)
 2843                 return;
 2844 
 2845         printf("%s: --- begin inm %p ---\n", __func__, inm);
 2846         printf("addr %s ifp %p(%s) ifma %p\n",
 2847             inet_ntoa(inm->inm_addr),
 2848             inm->inm_ifp,
 2849             inm->inm_ifp->if_xname,
 2850             inm->inm_ifma);
 2851         printf("timer %u state %s refcount %u scq.len %u\n",
 2852             inm->inm_timer,
 2853             inm_state_str(inm->inm_state),
 2854             inm->inm_refcount,
 2855             inm->inm_scq.ifq_len);
 2856         printf("igi %p nsrc %lu sctimer %u scrv %u\n",
 2857             inm->inm_igi,
 2858             inm->inm_nsrc,
 2859             inm->inm_sctimer,
 2860             inm->inm_scrv);
 2861         for (t = 0; t < 2; t++) {
 2862                 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
 2863                     inm_mode_str(inm->inm_st[t].iss_fmode),
 2864                     inm->inm_st[t].iss_asm,
 2865                     inm->inm_st[t].iss_ex,
 2866                     inm->inm_st[t].iss_in,
 2867                     inm->inm_st[t].iss_rec);
 2868         }
 2869         printf("%s: --- end inm %p ---\n", __func__, inm);
 2870 }
 2871 
 2872 #else /* !KTR */
 2873 
 2874 void
 2875 inm_print(const struct in_multi *inm)
 2876 {
 2877 
 2878 }
 2879 
 2880 #endif /* KTR */
 2881 
 2882 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);

Cache object: 519b7dc0cd0d68a6421646a499997543


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.