The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/in_pcbgroup.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2010-2011 Juniper Networks, Inc.
    5  * All rights reserved.
    6  *
    7  * This software was developed by Robert N. M. Watson under contract
    8  * to Juniper Networks, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 
   34 __FBSDID("$FreeBSD: head/sys/netinet/in_pcbgroup.c 335093 2018-06-13 23:19:54Z mmacy $");
   35 
   36 #include "opt_inet6.h"
   37 #include "opt_rss.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/lock.h>
   41 #include <sys/malloc.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/mutex.h>
   44 #include <sys/smp.h>
   45 #include <sys/socket.h>
   46 #include <sys/socketvar.h>
   47 
   48 #include <net/rss_config.h>
   49 
   50 #include <netinet/in.h>
   51 
   52 #include <netinet/in_pcb.h>
   53 #include <netinet/in_rss.h>
   54 #ifdef INET6
   55 #include <netinet6/in6_pcb.h>
   56 #endif /* INET6 */
   57 
   58 /*
   59  * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
   60  * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
   61  * Strategies in Modern Operating Systems".  This implementation differs
   62  * significantly from that described in the paper, in that it attempts to
   63  * introduce not just notions of affinity for connections and distribute work
   64  * so as to reduce lock contention, but also align those notions with
   65  * hardware work distribution strategies such as RSS.  In this construction,
   66  * connection groups supplement, rather than replace, existing reservation
   67  * tables for protocol 4-tuples, offering CPU-affine lookup tables with
   68  * minimal cache line migration and lock contention during steady state
   69  * operation.
   70  *
   71  * Hardware-offloaded checksums are often inefficient in software -- for
   72  * example, Toeplitz, specified by RSS, introduced a significant overhead if
   73  * performed during per-packge processing.  It is therefore desirable to fall
   74  * back on traditional reservation table lookups without affinity where
   75  * hardware-offloaded checksums aren't available, such as for traffic over
   76  * non-RSS interfaces.
   77  *
   78  * Internet protocols, such as UDP and TCP, register to use connection groups
   79  * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
   80  * indicates to the connection group code whether a 2-tuple or 4-tuple is
   81  * used as an argument to hashes that assign a connection to a particular
   82  * group.  This must be aligned with any hardware offloaded distribution
   83  * model, such as RSS or similar approaches taken in embedded network boards.
   84  * Wildcard sockets require special handling, as in Willman 2006, and are
   85  * shared between connection groups -- while being protected by group-local
   86  * locks.  This means that connection establishment and teardown can be
   87  * signficantly more expensive than without connection groups, but that
   88  * steady-state processing can be significantly faster.
   89  *
   90  * When RSS is used, certain connection group parameters, such as the number
   91  * of groups, are provided by the RSS implementation, found in in_rss.c.
   92  * Otherwise, in_pcbgroup.c selects possible sensible parameters
   93  * corresponding to the degree of parallelism exposed by netisr.
   94  *
   95  * Most of the implementation of connection groups is in this file; however,
   96  * connection group lookup is implemented in in_pcb.c alongside reservation
   97  * table lookups -- see in_pcblookup_group().
   98  *
   99  * TODO:
  100  *
  101  * Implement dynamic rebalancing of buckets with connection groups; when
  102  * load is unevenly distributed, search for more optimal balancing on
  103  * demand.  This might require scaling up the number of connection groups
  104  * by <<1.
  105  *
  106  * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
  107  * groups for ip_input and ip6_input, allowing non-offloaded work
  108  * distribution.
  109  *
  110  * Expose effective CPU affinity of connections to userspace using socket
  111  * options.
  112  *
  113  * Investigate per-connection affinity overrides based on socket options; an
  114  * option could be set, certainly resulting in work being distributed
  115  * differently in software, and possibly propagated to supporting hardware
  116  * with TCAMs or hardware hash tables.  This might require connections to
  117  * exist in more than one connection group at a time.
  118  *
  119  * Hook netisr thread reconfiguration events, and propagate those to RSS so
  120  * that rebalancing can occur when the thread pool grows or shrinks.
  121  *
  122  * Expose per-pcbgroup statistics to userspace monitoring tools such as
  123  * netstat, in order to allow better debugging and profiling.
  124  */
  125 
  126 void
  127 in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
  128     int hash_nelements)
  129 {
  130         struct inpcbgroup *pcbgroup;
  131         u_int numpcbgroups, pgn;
  132 
  133         /*
  134          * Only enable connection groups for a protocol if it has been
  135          * specifically requested.
  136          */
  137         if (hashfields == IPI_HASHFIELDS_NONE)
  138                 return;
  139 
  140         /*
  141          * Connection groups are about multi-processor load distribution,
  142          * lock contention, and connection CPU affinity.  As such, no point
  143          * in turning them on for a uniprocessor machine, it only wastes
  144          * memory.
  145          */
  146         if (mp_ncpus == 1)
  147                 return;
  148 
  149 #ifdef RSS
  150         /*
  151          * If we're using RSS, then RSS determines the number of connection
  152          * groups to use: one connection group per RSS bucket.  If for some
  153          * reason RSS isn't able to provide a number of buckets, disable
  154          * connection groups entirely.
  155          *
  156          * XXXRW: Can this ever happen?
  157          */
  158         numpcbgroups = rss_getnumbuckets();
  159         if (numpcbgroups == 0)
  160                 return;
  161 #else
  162         /*
  163          * Otherwise, we'll just use one per CPU for now.  If we decide to
  164          * do dynamic rebalancing a la RSS, we'll need similar logic here.
  165          */
  166         numpcbgroups = mp_ncpus;
  167 #endif
  168 
  169         pcbinfo->ipi_hashfields = hashfields;
  170         pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
  171             sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
  172         pcbinfo->ipi_npcbgroups = numpcbgroups;
  173         pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
  174             &pcbinfo->ipi_wildmask);
  175         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
  176                 pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
  177                 pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
  178                     &pcbgroup->ipg_hashmask);
  179                 INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
  180 
  181                 /*
  182                  * Initialise notional affinity of the pcbgroup -- for RSS,
  183                  * we want the same notion of affinity as NICs to be used.  In
  184                  * the non-RSS case, just round robin for the time being.
  185                  *
  186                  * XXXRW: The notion of a bucket to CPU mapping is common at
  187                  * both pcbgroup and RSS layers -- does that mean that we
  188                  * should migrate it all from RSS to here, and just leave RSS
  189                  * responsible only for providing hashing and mapping funtions?
  190                  */
  191 #ifdef RSS
  192                 pcbgroup->ipg_cpu = rss_getcpu(pgn);
  193 #else
  194                 pcbgroup->ipg_cpu = (pgn % mp_ncpus);
  195 #endif
  196         }
  197 }
  198 
  199 void
  200 in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
  201 {
  202         struct inpcbgroup *pcbgroup;
  203         u_int pgn;
  204 
  205         if (pcbinfo->ipi_npcbgroups == 0)
  206                 return;
  207 
  208         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
  209                 pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
  210                 KASSERT(CK_LIST_EMPTY(pcbinfo->ipi_listhead),
  211                     ("in_pcbinfo_destroy: listhead not empty"));
  212                 INP_GROUP_LOCK_DESTROY(pcbgroup);
  213                 hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
  214                     pcbgroup->ipg_hashmask);
  215         }
  216         hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
  217         free(pcbinfo->ipi_pcbgroups, M_PCB);
  218         pcbinfo->ipi_pcbgroups = NULL;
  219         pcbinfo->ipi_npcbgroups = 0;
  220         pcbinfo->ipi_hashfields = 0;
  221 }
  222 
  223 /*
  224  * Given a hash of whatever the covered tuple might be, return a pcbgroup
  225  * index.  Where RSS is supported, try to align bucket selection with RSS CPU
  226  * affinity strategy.
  227  */
  228 static __inline u_int
  229 in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
  230 {
  231 
  232 #ifdef RSS
  233         return (rss_getbucket(hash));
  234 #else
  235         return (hash % pcbinfo->ipi_npcbgroups);
  236 #endif
  237 }
  238 
  239 /*
  240  * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
  241  * information is insufficient to identify the pcbgroup.  This might occur if
  242  * a TCP packet turns up with a 2-tuple hash, or if an RSS hash is present but
  243  * RSS is not compiled into the kernel.
  244  */
  245 struct inpcbgroup *
  246 in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
  247 {
  248 
  249 #ifdef RSS
  250         if ((pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
  251             hashtype == M_HASHTYPE_RSS_TCP_IPV4) ||
  252             (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
  253             hashtype == M_HASHTYPE_RSS_UDP_IPV4) ||
  254             (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_2TUPLE &&
  255             hashtype == M_HASHTYPE_RSS_IPV4))
  256                 return (&pcbinfo->ipi_pcbgroups[
  257                     in_pcbgroup_getbucket(pcbinfo, hash)]);
  258 #endif
  259         return (NULL);
  260 }
  261 
  262 static struct inpcbgroup *
  263 in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
  264 {
  265 
  266         return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
  267             m->m_pkthdr.flowid));
  268 }
  269 
  270 struct inpcbgroup *
  271 in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
  272     u_short lport, struct in_addr faddr, u_short fport)
  273 {
  274         uint32_t hash;
  275 
  276         /*
  277          * RSS note: we pass foreign addr/port as source, and local addr/port
  278          * as destination, as we want to align with what the hardware is
  279          * doing.
  280          */
  281         switch (pcbinfo->ipi_hashfields) {
  282         case IPI_HASHFIELDS_4TUPLE:
  283 #ifdef RSS
  284                 hash = rss_hash_ip4_4tuple(faddr, fport, laddr, lport);
  285 #else
  286                 hash = faddr.s_addr ^ fport;
  287 #endif
  288                 break;
  289 
  290         case IPI_HASHFIELDS_2TUPLE:
  291 #ifdef RSS
  292                 hash = rss_hash_ip4_2tuple(faddr, laddr);
  293 #else
  294                 hash = faddr.s_addr ^ laddr.s_addr;
  295 #endif
  296                 break;
  297 
  298         default:
  299                 hash = 0;
  300         }
  301         return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
  302             hash)]);
  303 }
  304 
  305 struct inpcbgroup *
  306 in_pcbgroup_byinpcb(struct inpcb *inp)
  307 {
  308 #ifdef  RSS
  309         /*
  310          * Listen sockets with INP_RSS_BUCKET_SET set have a pre-determined
  311          * RSS bucket and thus we should use this pcbgroup, rather than
  312          * using a tuple or hash.
  313          *
  314          * XXX should verify that there's actually pcbgroups and inp_rss_listen_bucket
  315          * fits in that!
  316          */
  317         if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
  318                 return (&inp->inp_pcbinfo->ipi_pcbgroups[inp->inp_rss_listen_bucket]);
  319 #endif
  320 
  321         return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
  322             inp->inp_lport, inp->inp_faddr, inp->inp_fport));
  323 }
  324 
  325 static void
  326 in_pcbwild_add(struct inpcb *inp)
  327 {
  328         struct inpcbinfo *pcbinfo;
  329         struct inpcbhead *head;
  330         u_int pgn;
  331 
  332         INP_WLOCK_ASSERT(inp);
  333         KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
  334             ("%s: is wild",__func__));
  335 
  336         pcbinfo = inp->inp_pcbinfo;
  337         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
  338                 INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
  339         head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
  340             0, pcbinfo->ipi_wildmask)];
  341         CK_LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
  342         inp->inp_flags2 |= INP_PCBGROUPWILD;
  343         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
  344                 INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
  345 }
  346 
  347 static void
  348 in_pcbwild_remove(struct inpcb *inp)
  349 {
  350         struct inpcbinfo *pcbinfo;
  351         u_int pgn;
  352 
  353         INP_WLOCK_ASSERT(inp);
  354         KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
  355             ("%s: not wild", __func__));
  356 
  357         pcbinfo = inp->inp_pcbinfo;
  358         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
  359                 INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
  360         CK_LIST_REMOVE(inp, inp_pcbgroup_wild);
  361         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
  362                 INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
  363         inp->inp_flags2 &= ~INP_PCBGROUPWILD;
  364 }
  365 
  366 static __inline int
  367 in_pcbwild_needed(struct inpcb *inp)
  368 {
  369 #ifdef  RSS
  370         /*
  371          * If it's a listen socket and INP_RSS_BUCKET_SET is set,
  372          * it's a wildcard socket _but_ it's in a specific pcbgroup.
  373          * Thus we don't treat it as a pcbwild inp.
  374          */
  375         if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
  376                 return (0);
  377 #endif
  378 
  379 #ifdef INET6
  380         if (inp->inp_vflag & INP_IPV6)
  381                 return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
  382         else
  383 #endif
  384                 return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
  385 }
  386 
  387 static void
  388 in_pcbwild_update_internal(struct inpcb *inp)
  389 {
  390         int wildcard_needed;
  391 
  392         wildcard_needed = in_pcbwild_needed(inp);
  393         if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
  394                 in_pcbwild_add(inp);
  395         else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
  396                 in_pcbwild_remove(inp);
  397 }
  398 
  399 /*
  400  * Update the pcbgroup of an inpcb, which might include removing an old
  401  * pcbgroup reference and/or adding a new one.  Wildcard processing is not
  402  * performed here, although ideally we'll never install a pcbgroup for a
  403  * wildcard inpcb (asserted below).
  404  */
  405 static void
  406 in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
  407     struct inpcbgroup *newpcbgroup, struct inpcb *inp)
  408 {
  409         struct inpcbgroup *oldpcbgroup;
  410         struct inpcbhead *pcbhash;
  411         uint32_t hashkey_faddr;
  412 
  413         INP_WLOCK_ASSERT(inp);
  414 
  415         oldpcbgroup = inp->inp_pcbgroup;
  416         if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
  417                 INP_GROUP_LOCK(oldpcbgroup);
  418                 CK_LIST_REMOVE(inp, inp_pcbgrouphash);
  419                 inp->inp_pcbgroup = NULL;
  420                 INP_GROUP_UNLOCK(oldpcbgroup);
  421         }
  422         if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
  423 #ifdef INET6
  424                 if (inp->inp_vflag & INP_IPV6)
  425                         hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
  426                 else
  427 #endif
  428                         hashkey_faddr = inp->inp_faddr.s_addr;
  429                 INP_GROUP_LOCK(newpcbgroup);
  430                 /*
  431                  * If the inp is an RSS bucket wildcard entry, ensure
  432                  * that the PCB hash is calculated correctly.
  433                  *
  434                  * The wildcard hash calculation differs from the
  435                  * non-wildcard definition.  The source address is
  436                  * INADDR_ANY and the far port is 0.
  437                  */
  438                 if (inp->inp_flags2 & INP_RSS_BUCKET_SET) {
  439                         pcbhash = &newpcbgroup->ipg_hashbase[
  440                             INP_PCBHASH(INADDR_ANY, inp->inp_lport, 0,
  441                             newpcbgroup->ipg_hashmask)];
  442                 } else {
  443                         pcbhash = &newpcbgroup->ipg_hashbase[
  444                             INP_PCBHASH(hashkey_faddr, inp->inp_lport,
  445                             inp->inp_fport,
  446                             newpcbgroup->ipg_hashmask)];
  447                 }
  448                 CK_LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
  449                 inp->inp_pcbgroup = newpcbgroup;
  450                 INP_GROUP_UNLOCK(newpcbgroup);
  451         }
  452 
  453         KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
  454             ("%s: pcbgroup and wildcard!", __func__));
  455 }
  456 
  457 /*
  458  * Two update paths: one in which the 4-tuple on an inpcb has been updated
  459  * and therefore connection groups may need to change (or a wildcard entry
  460  * may needed to be installed), and another in which the 4-tuple has been
  461  * set as a result of a packet received, in which case we may be able to use
  462  * the hash on the mbuf to avoid doing a software hash calculation for RSS.
  463  *
  464  * In each case: first, let the wildcard code have a go at placing it as a
  465  * wildcard socket.  If it was a wildcard, or if the connection has been
  466  * dropped, then no pcbgroup is required (so potentially clear it);
  467  * otherwise, calculate and update the pcbgroup for the inpcb.
  468  */
  469 void
  470 in_pcbgroup_update(struct inpcb *inp)
  471 {
  472         struct inpcbinfo *pcbinfo;
  473         struct inpcbgroup *newpcbgroup;
  474 
  475         INP_WLOCK_ASSERT(inp);
  476 
  477         pcbinfo = inp->inp_pcbinfo;
  478         if (!in_pcbgroup_enabled(pcbinfo))
  479                 return;
  480 
  481         in_pcbwild_update_internal(inp);
  482         if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
  483             !(inp->inp_flags & INP_DROPPED)) {
  484 #ifdef INET6
  485                 if (inp->inp_vflag & INP_IPV6)
  486                         newpcbgroup = in6_pcbgroup_byinpcb(inp);
  487                 else
  488 #endif
  489                         newpcbgroup = in_pcbgroup_byinpcb(inp);
  490         } else
  491                 newpcbgroup = NULL;
  492         in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
  493 }
  494 
  495 void
  496 in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
  497 {
  498         struct inpcbinfo *pcbinfo;
  499         struct inpcbgroup *newpcbgroup;
  500 
  501         INP_WLOCK_ASSERT(inp);
  502 
  503         pcbinfo = inp->inp_pcbinfo;
  504         if (!in_pcbgroup_enabled(pcbinfo))
  505                 return;
  506 
  507         /*
  508          * Possibly should assert !INP_PCBGROUPWILD rather than testing for
  509          * it; presumably this function should never be called for anything
  510          * other than non-wildcard socket?
  511          */
  512         in_pcbwild_update_internal(inp);
  513         if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
  514             !(inp->inp_flags & INP_DROPPED)) {
  515                 newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
  516 #ifdef INET6
  517                 if (inp->inp_vflag & INP_IPV6) {
  518                         if (newpcbgroup == NULL)
  519                                 newpcbgroup = in6_pcbgroup_byinpcb(inp);
  520                 } else {
  521 #endif
  522                         if (newpcbgroup == NULL)
  523                                 newpcbgroup = in_pcbgroup_byinpcb(inp);
  524 #ifdef INET6
  525                 }
  526 #endif
  527         } else
  528                 newpcbgroup = NULL;
  529         in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
  530 }
  531 
  532 /*
  533  * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
  534  */
  535 void
  536 in_pcbgroup_remove(struct inpcb *inp)
  537 {
  538         struct inpcbgroup *pcbgroup;
  539 
  540         INP_WLOCK_ASSERT(inp);
  541 
  542         if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
  543                 return;
  544 
  545         if (inp->inp_flags2 & INP_PCBGROUPWILD)
  546                 in_pcbwild_remove(inp);
  547 
  548         pcbgroup = inp->inp_pcbgroup;
  549         if (pcbgroup != NULL) {
  550                 INP_GROUP_LOCK(pcbgroup);
  551                 CK_LIST_REMOVE(inp, inp_pcbgrouphash);
  552                 inp->inp_pcbgroup = NULL;
  553                 INP_GROUP_UNLOCK(pcbgroup);
  554         }
  555 }
  556 
  557 /*
  558  * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
  559  * for a protocol.
  560  */
  561 int
  562 in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
  563 {
  564 
  565         return (pcbinfo->ipi_npcbgroups > 0);
  566 }

Cache object: 4e6705052c6fa4eb588c20baa7320083


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.