The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/in_pcbgroup.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2010-2011 Juniper Networks, Inc.
    3  * All rights reserved.
    4  *
    5  * This software was developed by Robert N. M. Watson under contract
    6  * to Juniper Networks, Inc.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 
   32 __FBSDID("$FreeBSD: releng/11.1/sys/netinet/in_pcbgroup.c 297439 2016-03-31 00:53:23Z gnn $");
   33 
   34 #include "opt_inet6.h"
   35 #include "opt_rss.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/lock.h>
   39 #include <sys/malloc.h>
   40 #include <sys/mbuf.h>
   41 #include <sys/mutex.h>
   42 #include <sys/smp.h>
   43 #include <sys/socket.h>
   44 #include <sys/socketvar.h>
   45 
   46 #include <net/rss_config.h>
   47 
   48 #include <netinet/in.h>
   49 
   50 #include <netinet/in_pcb.h>
   51 #include <netinet/in_rss.h>
   52 #ifdef INET6
   53 #include <netinet6/in6_pcb.h>
   54 #endif /* INET6 */
   55 
   56 /*
   57  * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
   58  * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
   59  * Strategies in Modern Operating Systems".  This implementation differs
   60  * significantly from that described in the paper, in that it attempts to
   61  * introduce not just notions of affinity for connections and distribute work
   62  * so as to reduce lock contention, but also align those notions with
   63  * hardware work distribution strategies such as RSS.  In this construction,
   64  * connection groups supplement, rather than replace, existing reservation
   65  * tables for protocol 4-tuples, offering CPU-affine lookup tables with
   66  * minimal cache line migration and lock contention during steady state
   67  * operation.
   68  *
   69  * Hardware-offloaded checksums are often inefficient in software -- for
   70  * example, Toeplitz, specified by RSS, introduced a significant overhead if
   71  * performed during per-packge processing.  It is therefore desirable to fall
   72  * back on traditional reservation table lookups without affinity where
   73  * hardware-offloaded checksums aren't available, such as for traffic over
   74  * non-RSS interfaces.
   75  *
   76  * Internet protocols, such as UDP and TCP, register to use connection groups
   77  * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
   78  * indicates to the connection group code whether a 2-tuple or 4-tuple is
   79  * used as an argument to hashes that assign a connection to a particular
   80  * group.  This must be aligned with any hardware offloaded distribution
   81  * model, such as RSS or similar approaches taken in embedded network boards.
   82  * Wildcard sockets require special handling, as in Willman 2006, and are
   83  * shared between connection groups -- while being protected by group-local
   84  * locks.  This means that connection establishment and teardown can be
   85  * signficantly more expensive than without connection groups, but that
   86  * steady-state processing can be significantly faster.
   87  *
   88  * When RSS is used, certain connection group parameters, such as the number
   89  * of groups, are provided by the RSS implementation, found in in_rss.c.
   90  * Otherwise, in_pcbgroup.c selects possible sensible parameters
   91  * corresponding to the degree of parallelism exposed by netisr.
   92  *
   93  * Most of the implementation of connection groups is in this file; however,
   94  * connection group lookup is implemented in in_pcb.c alongside reservation
   95  * table lookups -- see in_pcblookup_group().
   96  *
   97  * TODO:
   98  *
   99  * Implement dynamic rebalancing of buckets with connection groups; when
  100  * load is unevenly distributed, search for more optimal balancing on
  101  * demand.  This might require scaling up the number of connection groups
  102  * by <<1.
  103  *
  104  * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
  105  * groups for ip_input and ip6_input, allowing non-offloaded work
  106  * distribution.
  107  *
  108  * Expose effective CPU affinity of connections to userspace using socket
  109  * options.
  110  *
  111  * Investigate per-connection affinity overrides based on socket options; an
  112  * option could be set, certainly resulting in work being distributed
  113  * differently in software, and possibly propagated to supporting hardware
  114  * with TCAMs or hardware hash tables.  This might require connections to
  115  * exist in more than one connection group at a time.
  116  *
  117  * Hook netisr thread reconfiguration events, and propagate those to RSS so
  118  * that rebalancing can occur when the thread pool grows or shrinks.
  119  *
  120  * Expose per-pcbgroup statistics to userspace monitoring tools such as
  121  * netstat, in order to allow better debugging and profiling.
  122  */
  123 
  124 void
  125 in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
  126     int hash_nelements)
  127 {
  128         struct inpcbgroup *pcbgroup;
  129         u_int numpcbgroups, pgn;
  130 
  131         /*
  132          * Only enable connection groups for a protocol if it has been
  133          * specifically requested.
  134          */
  135         if (hashfields == IPI_HASHFIELDS_NONE)
  136                 return;
  137 
  138         /*
  139          * Connection groups are about multi-processor load distribution,
  140          * lock contention, and connection CPU affinity.  As such, no point
  141          * in turning them on for a uniprocessor machine, it only wastes
  142          * memory.
  143          */
  144         if (mp_ncpus == 1)
  145                 return;
  146 
  147 #ifdef RSS
  148         /*
  149          * If we're using RSS, then RSS determines the number of connection
  150          * groups to use: one connection group per RSS bucket.  If for some
  151          * reason RSS isn't able to provide a number of buckets, disable
  152          * connection groups entirely.
  153          *
  154          * XXXRW: Can this ever happen?
  155          */
  156         numpcbgroups = rss_getnumbuckets();
  157         if (numpcbgroups == 0)
  158                 return;
  159 #else
  160         /*
  161          * Otherwise, we'll just use one per CPU for now.  If we decide to
  162          * do dynamic rebalancing a la RSS, we'll need similar logic here.
  163          */
  164         numpcbgroups = mp_ncpus;
  165 #endif
  166 
  167         pcbinfo->ipi_hashfields = hashfields;
  168         pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
  169             sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
  170         pcbinfo->ipi_npcbgroups = numpcbgroups;
  171         pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
  172             &pcbinfo->ipi_wildmask);
  173         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
  174                 pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
  175                 pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
  176                     &pcbgroup->ipg_hashmask);
  177                 INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
  178 
  179                 /*
  180                  * Initialise notional affinity of the pcbgroup -- for RSS,
  181                  * we want the same notion of affinity as NICs to be used.  In
  182                  * the non-RSS case, just round robin for the time being.
  183                  *
  184                  * XXXRW: The notion of a bucket to CPU mapping is common at
  185                  * both pcbgroup and RSS layers -- does that mean that we
  186                  * should migrate it all from RSS to here, and just leave RSS
  187                  * responsible only for providing hashing and mapping funtions?
  188                  */
  189 #ifdef RSS
  190                 pcbgroup->ipg_cpu = rss_getcpu(pgn);
  191 #else
  192                 pcbgroup->ipg_cpu = (pgn % mp_ncpus);
  193 #endif
  194         }
  195 }
  196 
  197 void
  198 in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
  199 {
  200         struct inpcbgroup *pcbgroup;
  201         u_int pgn;
  202 
  203         if (pcbinfo->ipi_npcbgroups == 0)
  204                 return;
  205 
  206         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
  207                 pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
  208                 KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead),
  209                     ("in_pcbinfo_destroy: listhead not empty"));
  210                 INP_GROUP_LOCK_DESTROY(pcbgroup);
  211                 hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
  212                     pcbgroup->ipg_hashmask);
  213         }
  214         hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
  215         free(pcbinfo->ipi_pcbgroups, M_PCB);
  216         pcbinfo->ipi_pcbgroups = NULL;
  217         pcbinfo->ipi_npcbgroups = 0;
  218         pcbinfo->ipi_hashfields = 0;
  219 }
  220 
  221 /*
  222  * Given a hash of whatever the covered tuple might be, return a pcbgroup
  223  * index.  Where RSS is supported, try to align bucket selection with RSS CPU
  224  * affinity strategy.
  225  */
  226 static __inline u_int
  227 in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
  228 {
  229 
  230 #ifdef RSS
  231         return (rss_getbucket(hash));
  232 #else
  233         return (hash % pcbinfo->ipi_npcbgroups);
  234 #endif
  235 }
  236 
  237 /*
  238  * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
  239  * information is insufficient to identify the pcbgroup.  This might occur if
  240  * a TCP packet turns up with a 2-tuple hash, or if an RSS hash is present but
  241  * RSS is not compiled into the kernel.
  242  */
  243 struct inpcbgroup *
  244 in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
  245 {
  246 
  247 #ifdef RSS
  248         if ((pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
  249             hashtype == M_HASHTYPE_RSS_TCP_IPV4) ||
  250             (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
  251             hashtype == M_HASHTYPE_RSS_UDP_IPV4) ||
  252             (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_2TUPLE &&
  253             hashtype == M_HASHTYPE_RSS_IPV4))
  254                 return (&pcbinfo->ipi_pcbgroups[
  255                     in_pcbgroup_getbucket(pcbinfo, hash)]);
  256 #endif
  257         return (NULL);
  258 }
  259 
  260 static struct inpcbgroup *
  261 in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
  262 {
  263 
  264         return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
  265             m->m_pkthdr.flowid));
  266 }
  267 
  268 struct inpcbgroup *
  269 in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
  270     u_short lport, struct in_addr faddr, u_short fport)
  271 {
  272         uint32_t hash;
  273 
  274         /*
  275          * RSS note: we pass foreign addr/port as source, and local addr/port
  276          * as destination, as we want to align with what the hardware is
  277          * doing.
  278          */
  279         switch (pcbinfo->ipi_hashfields) {
  280         case IPI_HASHFIELDS_4TUPLE:
  281 #ifdef RSS
  282                 hash = rss_hash_ip4_4tuple(faddr, fport, laddr, lport);
  283 #else
  284                 hash = faddr.s_addr ^ fport;
  285 #endif
  286                 break;
  287 
  288         case IPI_HASHFIELDS_2TUPLE:
  289 #ifdef RSS
  290                 hash = rss_hash_ip4_2tuple(faddr, laddr);
  291 #else
  292                 hash = faddr.s_addr ^ laddr.s_addr;
  293 #endif
  294                 break;
  295 
  296         default:
  297                 hash = 0;
  298         }
  299         return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
  300             hash)]);
  301 }
  302 
  303 struct inpcbgroup *
  304 in_pcbgroup_byinpcb(struct inpcb *inp)
  305 {
  306 #ifdef  RSS
  307         /*
  308          * Listen sockets with INP_RSS_BUCKET_SET set have a pre-determined
  309          * RSS bucket and thus we should use this pcbgroup, rather than
  310          * using a tuple or hash.
  311          *
  312          * XXX should verify that there's actually pcbgroups and inp_rss_listen_bucket
  313          * fits in that!
  314          */
  315         if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
  316                 return (&inp->inp_pcbinfo->ipi_pcbgroups[inp->inp_rss_listen_bucket]);
  317 #endif
  318 
  319         return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
  320             inp->inp_lport, inp->inp_faddr, inp->inp_fport));
  321 }
  322 
  323 static void
  324 in_pcbwild_add(struct inpcb *inp)
  325 {
  326         struct inpcbinfo *pcbinfo;
  327         struct inpcbhead *head;
  328         u_int pgn;
  329 
  330         INP_WLOCK_ASSERT(inp);
  331         KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
  332             ("%s: is wild",__func__));
  333 
  334         pcbinfo = inp->inp_pcbinfo;
  335         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
  336                 INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
  337         head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
  338             0, pcbinfo->ipi_wildmask)];
  339         LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
  340         inp->inp_flags2 |= INP_PCBGROUPWILD;
  341         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
  342                 INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
  343 }
  344 
  345 static void
  346 in_pcbwild_remove(struct inpcb *inp)
  347 {
  348         struct inpcbinfo *pcbinfo;
  349         u_int pgn;
  350 
  351         INP_WLOCK_ASSERT(inp);
  352         KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
  353             ("%s: not wild", __func__));
  354 
  355         pcbinfo = inp->inp_pcbinfo;
  356         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
  357                 INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
  358         LIST_REMOVE(inp, inp_pcbgroup_wild);
  359         for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
  360                 INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
  361         inp->inp_flags2 &= ~INP_PCBGROUPWILD;
  362 }
  363 
  364 static __inline int
  365 in_pcbwild_needed(struct inpcb *inp)
  366 {
  367 #ifdef  RSS
  368         /*
  369          * If it's a listen socket and INP_RSS_BUCKET_SET is set,
  370          * it's a wildcard socket _but_ it's in a specific pcbgroup.
  371          * Thus we don't treat it as a pcbwild inp.
  372          */
  373         if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
  374                 return (0);
  375 #endif
  376 
  377 #ifdef INET6
  378         if (inp->inp_vflag & INP_IPV6)
  379                 return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
  380         else
  381 #endif
  382                 return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
  383 }
  384 
  385 static void
  386 in_pcbwild_update_internal(struct inpcb *inp)
  387 {
  388         int wildcard_needed;
  389 
  390         wildcard_needed = in_pcbwild_needed(inp);
  391         if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
  392                 in_pcbwild_add(inp);
  393         else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
  394                 in_pcbwild_remove(inp);
  395 }
  396 
  397 /*
  398  * Update the pcbgroup of an inpcb, which might include removing an old
  399  * pcbgroup reference and/or adding a new one.  Wildcard processing is not
  400  * performed here, although ideally we'll never install a pcbgroup for a
  401  * wildcard inpcb (asserted below).
  402  */
  403 static void
  404 in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
  405     struct inpcbgroup *newpcbgroup, struct inpcb *inp)
  406 {
  407         struct inpcbgroup *oldpcbgroup;
  408         struct inpcbhead *pcbhash;
  409         uint32_t hashkey_faddr;
  410 
  411         INP_WLOCK_ASSERT(inp);
  412 
  413         oldpcbgroup = inp->inp_pcbgroup;
  414         if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
  415                 INP_GROUP_LOCK(oldpcbgroup);
  416                 LIST_REMOVE(inp, inp_pcbgrouphash);
  417                 inp->inp_pcbgroup = NULL;
  418                 INP_GROUP_UNLOCK(oldpcbgroup);
  419         }
  420         if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
  421 #ifdef INET6
  422                 if (inp->inp_vflag & INP_IPV6)
  423                         hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
  424                 else
  425 #endif
  426                         hashkey_faddr = inp->inp_faddr.s_addr;
  427                 INP_GROUP_LOCK(newpcbgroup);
  428                 /*
  429                  * If the inp is an RSS bucket wildcard entry, ensure
  430                  * that the PCB hash is calculated correctly.
  431                  *
  432                  * The wildcard hash calculation differs from the
  433                  * non-wildcard definition.  The source address is
  434                  * INADDR_ANY and the far port is 0.
  435                  */
  436                 if (inp->inp_flags2 & INP_RSS_BUCKET_SET) {
  437                         pcbhash = &newpcbgroup->ipg_hashbase[
  438                             INP_PCBHASH(INADDR_ANY, inp->inp_lport, 0,
  439                             newpcbgroup->ipg_hashmask)];
  440                 } else {
  441                         pcbhash = &newpcbgroup->ipg_hashbase[
  442                             INP_PCBHASH(hashkey_faddr, inp->inp_lport,
  443                             inp->inp_fport,
  444                             newpcbgroup->ipg_hashmask)];
  445                 }
  446                 LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
  447                 inp->inp_pcbgroup = newpcbgroup;
  448                 INP_GROUP_UNLOCK(newpcbgroup);
  449         }
  450 
  451         KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
  452             ("%s: pcbgroup and wildcard!", __func__));
  453 }
  454 
  455 /*
  456  * Two update paths: one in which the 4-tuple on an inpcb has been updated
  457  * and therefore connection groups may need to change (or a wildcard entry
  458  * may needed to be installed), and another in which the 4-tuple has been
  459  * set as a result of a packet received, in which case we may be able to use
  460  * the hash on the mbuf to avoid doing a software hash calculation for RSS.
  461  *
  462  * In each case: first, let the wildcard code have a go at placing it as a
  463  * wildcard socket.  If it was a wildcard, or if the connection has been
  464  * dropped, then no pcbgroup is required (so potentially clear it);
  465  * otherwise, calculate and update the pcbgroup for the inpcb.
  466  */
  467 void
  468 in_pcbgroup_update(struct inpcb *inp)
  469 {
  470         struct inpcbinfo *pcbinfo;
  471         struct inpcbgroup *newpcbgroup;
  472 
  473         INP_WLOCK_ASSERT(inp);
  474 
  475         pcbinfo = inp->inp_pcbinfo;
  476         if (!in_pcbgroup_enabled(pcbinfo))
  477                 return;
  478 
  479         in_pcbwild_update_internal(inp);
  480         if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
  481             !(inp->inp_flags & INP_DROPPED)) {
  482 #ifdef INET6
  483                 if (inp->inp_vflag & INP_IPV6)
  484                         newpcbgroup = in6_pcbgroup_byinpcb(inp);
  485                 else
  486 #endif
  487                         newpcbgroup = in_pcbgroup_byinpcb(inp);
  488         } else
  489                 newpcbgroup = NULL;
  490         in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
  491 }
  492 
  493 void
  494 in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
  495 {
  496         struct inpcbinfo *pcbinfo;
  497         struct inpcbgroup *newpcbgroup;
  498 
  499         INP_WLOCK_ASSERT(inp);
  500 
  501         pcbinfo = inp->inp_pcbinfo;
  502         if (!in_pcbgroup_enabled(pcbinfo))
  503                 return;
  504 
  505         /*
  506          * Possibly should assert !INP_PCBGROUPWILD rather than testing for
  507          * it; presumably this function should never be called for anything
  508          * other than non-wildcard socket?
  509          */
  510         in_pcbwild_update_internal(inp);
  511         if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
  512             !(inp->inp_flags & INP_DROPPED)) {
  513                 newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
  514 #ifdef INET6
  515                 if (inp->inp_vflag & INP_IPV6) {
  516                         if (newpcbgroup == NULL)
  517                                 newpcbgroup = in6_pcbgroup_byinpcb(inp);
  518                 } else {
  519 #endif
  520                         if (newpcbgroup == NULL)
  521                                 newpcbgroup = in_pcbgroup_byinpcb(inp);
  522 #ifdef INET6
  523                 }
  524 #endif
  525         } else
  526                 newpcbgroup = NULL;
  527         in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
  528 }
  529 
  530 /*
  531  * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
  532  */
  533 void
  534 in_pcbgroup_remove(struct inpcb *inp)
  535 {
  536         struct inpcbgroup *pcbgroup;
  537 
  538         INP_WLOCK_ASSERT(inp);
  539 
  540         if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
  541                 return;
  542 
  543         if (inp->inp_flags2 & INP_PCBGROUPWILD)
  544                 in_pcbwild_remove(inp);
  545 
  546         pcbgroup = inp->inp_pcbgroup;
  547         if (pcbgroup != NULL) {
  548                 INP_GROUP_LOCK(pcbgroup);
  549                 LIST_REMOVE(inp, inp_pcbgrouphash);
  550                 inp->inp_pcbgroup = NULL;
  551                 INP_GROUP_UNLOCK(pcbgroup);
  552         }
  553 }
  554 
  555 /*
  556  * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
  557  * for a protocol.
  558  */
  559 int
  560 in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
  561 {
  562 
  563         return (pcbinfo->ipi_npcbgroups > 0);
  564 }

Cache object: b1862d7e56e801299159bf8d20a2959f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.