The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/route/nhop.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2020 Alexander V. Chernikov
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 #include "opt_inet.h"
   31 #include "opt_route.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/lock.h>
   36 #include <sys/rwlock.h>
   37 #include <sys/malloc.h>
   38 #include <sys/mbuf.h>
   39 #include <sys/socket.h>
   40 #include <sys/kernel.h>
   41 
   42 #include <net/if.h>
   43 #include <net/if_var.h>
   44 #include <net/route.h>
   45 #include <net/route/route_var.h>
   46 #include <net/route/nhop_utils.h>
   47 #include <net/route/nhop.h>
   48 #include <net/route/nhop_var.h>
   49 #include <net/vnet.h>
   50 
   51 #define DEBUG_MOD_NAME  nhop
   52 #define DEBUG_MAX_LEVEL LOG_DEBUG
   53 #include <net/route/route_debug.h>
   54 _DECLARE_DEBUG(LOG_INFO);
   55 
   56 /*
   57  * This file contains data structures management logic for the nexthop ("nhop")
   58  *   route subsystem.
   59  *
   60  * Nexthops in the original sense are the objects containing all the necessary
   61  * information to forward the packet to the selected destination.
   62  * In particular, nexthop is defined by a combination of
   63  *  ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
   64  *    NHF_DEFAULT
   65  *
   66  * All nexthops are stored in the resizable hash table.
   67  * Additionally, each nexthop gets assigned its unique index (nexthop index)
   68  * so userland programs can interact with the nexthops easier. Index allocation
   69  * is backed by the bitmask array.
   70  */
   71 
   72 MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data");
   73 
   74 /* Hash management functions */
   75 
   76 int
   77 nhops_init_rib(struct rib_head *rh)
   78 {
   79         struct nh_control *ctl;
   80         size_t alloc_size;
   81         uint32_t num_buckets, num_items;
   82         void *ptr;
   83 
   84         ctl = malloc(sizeof(struct nh_control), M_NHOP, M_WAITOK | M_ZERO);
   85 
   86         /*
   87          * Allocate nexthop hash. Start with 16 items by default (128 bytes).
   88          * This will be enough for most of the cases.
   89          */
   90         num_buckets = 16;
   91         alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
   92         ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO);
   93         CHT_SLIST_INIT(&ctl->nh_head, ptr, num_buckets);
   94 
   95         /*
   96          * Allocate nexthop index bitmask.
   97          */
   98         num_items = 128 * 8; /* 128 bytes */
   99         ptr = malloc(bitmask_get_size(num_items), M_NHOP, M_WAITOK | M_ZERO);
  100         bitmask_init(&ctl->nh_idx_head, ptr, num_items);
  101 
  102         NHOPS_LOCK_INIT(ctl);
  103 
  104         rh->nh_control = ctl;
  105         ctl->ctl_rh = rh;
  106 
  107         FIB_CTL_LOG(LOG_DEBUG2, ctl, "nhops init: ctl %p rh %p", ctl, rh);
  108 
  109         return (0);
  110 }
  111 
  112 static void
  113 destroy_ctl(struct nh_control *ctl)
  114 {
  115 
  116         NHOPS_LOCK_DESTROY(ctl);
  117         free(ctl->nh_head.ptr, M_NHOP);
  118         free(ctl->nh_idx_head.idx, M_NHOP);
  119 #ifdef ROUTE_MPATH
  120         nhgrp_ctl_free(ctl);
  121 #endif
  122         free(ctl, M_NHOP);
  123 }
  124 
  125 /*
  126  * Epoch callback indicating ctl is safe to destroy
  127  */
  128 static void
  129 destroy_ctl_epoch(epoch_context_t ctx)
  130 {
  131         struct nh_control *ctl;
  132 
  133         ctl = __containerof(ctx, struct nh_control, ctl_epoch_ctx);
  134 
  135         destroy_ctl(ctl);
  136 }
  137 
  138 void
  139 nhops_destroy_rib(struct rib_head *rh)
  140 {
  141         struct nh_control *ctl;
  142         struct nhop_priv *nh_priv;
  143 
  144         ctl = rh->nh_control;
  145 
  146         /*
  147          * All routes should have been deleted in rt_table_destroy().
  148          * However, TCP stack or other consumers may store referenced
  149          *  nexthop pointers. When these references go to zero,
  150          *  nhop_free() will try to unlink these records from the
  151          *  datastructures, most likely leading to panic.
  152          *
  153          * Avoid that by explicitly marking all of the remaining
  154          *  nexthops as unlinked by removing a reference from a special
  155          *  counter. Please see nhop_free() comments for more
  156          *  details.
  157          */
  158 
  159         NHOPS_WLOCK(ctl);
  160         CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
  161                 FIB_RH_LOG(LOG_DEBUG3, rh, "marking nhop %u unlinked", nh_priv->nh_idx);
  162                 refcount_release(&nh_priv->nh_linked);
  163         } CHT_SLIST_FOREACH_END;
  164 #ifdef ROUTE_MPATH
  165         nhgrp_ctl_unlink_all(ctl);
  166 #endif
  167         NHOPS_WUNLOCK(ctl);
  168 
  169         /*
  170          * Postpone destruction till the end of current epoch
  171          * so nhop_free() can safely use nh_control pointer.
  172          */
  173         NET_EPOCH_CALL(destroy_ctl_epoch, &ctl->ctl_epoch_ctx);
  174 }
  175 
  176 /*
  177  * Nexhop hash calculation:
  178  *
  179  * Nexthops distribution:
  180  * 2 "mandatory" nexthops per interface ("interface route", "loopback").
  181  * For direct peering: 1 nexthop for the peering router per ifp/af.
  182  * For Ix-like peering: tens to hundreds nexthops of neghbors per ifp/af.
  183  * IGP control plane & broadcast segment: tens of nexthops per ifp/af.
  184  *
  185  * Each fib/af combination has its own hash table.
  186  * With that in mind, hash nexthops by the combination of the interface
  187  *  and GW IP address.
  188  *
  189  * To optimize hash calculation, ignore lower bits of ifnet pointer,
  190  * as they  give very little entropy.
  191  * Similarly, use lower 4 bytes of IPv6 address to distinguish between the
  192  *  neighbors.
  193  */
  194 struct _hash_data {
  195         uint16_t        ifentropy;
  196         uint8_t         family;
  197         uint8_t         nh_type;
  198         uint32_t        gw_addr;
  199 };
  200 
  201 static unsigned
  202 djb_hash(const unsigned char *h, const int len)
  203 {
  204         unsigned int result = 0;
  205         int i;
  206 
  207         for (i = 0; i < len; i++)
  208                 result = 33 * result ^ h[i];
  209 
  210         return (result);
  211 }
  212 
  213 static uint32_t
  214 hash_priv(const struct nhop_priv *priv)
  215 {
  216         struct nhop_object *nh = priv->nh;
  217         struct _hash_data key = {
  218             .ifentropy = (uint16_t)((((uintptr_t)nh->nh_ifp) >> 6) & 0xFFFF),
  219             .family = nh->gw_sa.sa_family,
  220             .nh_type = priv->nh_type & 0xFF,
  221             .gw_addr = (nh->gw_sa.sa_family == AF_INET6) ?
  222                 nh->gw6_sa.sin6_addr.s6_addr32[3] :
  223                 nh->gw4_sa.sin_addr.s_addr
  224         };
  225 
  226         return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key)));
  227 }
  228 
  229 /*
  230  * Checks if hash needs resizing and performs this resize if necessary
  231  *
  232  */
  233 static void
  234 consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items)
  235 {
  236         void *nh_ptr, *nh_idx_ptr;
  237         void *old_idx_ptr;
  238         size_t alloc_size;
  239 
  240         nh_ptr = NULL;
  241         if (new_nh_buckets != 0) {
  242                 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets);
  243                 nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
  244         }
  245 
  246         nh_idx_ptr = NULL;
  247         if (new_idx_items != 0) {
  248                 alloc_size = bitmask_get_size(new_idx_items);
  249                 nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
  250         }
  251 
  252         if (nh_ptr == NULL && nh_idx_ptr == NULL) {
  253                 /* Either resize is not required or allocations have failed. */
  254                 return;
  255         }
  256 
  257         FIB_CTL_LOG(LOG_DEBUG, ctl,
  258             "going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]",
  259             nh_ptr, new_nh_buckets, nh_idx_ptr, new_idx_items);
  260 
  261         old_idx_ptr = NULL;
  262 
  263         NHOPS_WLOCK(ctl);
  264         if (nh_ptr != NULL) {
  265                 CHT_SLIST_RESIZE(&ctl->nh_head, nhops, nh_ptr, new_nh_buckets);
  266         }
  267         if (nh_idx_ptr != NULL) {
  268                 if (bitmask_copy(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items) == 0)
  269                         bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr);
  270         }
  271         NHOPS_WUNLOCK(ctl);
  272 
  273         if (nh_ptr != NULL)
  274                 free(nh_ptr, M_NHOP);
  275         if (old_idx_ptr != NULL)
  276                 free(old_idx_ptr, M_NHOP);
  277 }
  278 
  279 /*
  280  * Links nextop @nh_priv to the nexhop hash table and allocates
  281  *  nexhop index.
  282  * Returns allocated index or 0 on failure.
  283  */
  284 int
  285 link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv)
  286 {
  287         uint16_t idx;
  288         uint32_t num_buckets_new, num_items_new;
  289 
  290         KASSERT((nh_priv->nh_idx == 0), ("nhop index is already allocated"));
  291         NHOPS_WLOCK(ctl);
  292 
  293         /*
  294          * Check if we need to resize hash and index.
  295          * The following 2 functions returns either new size or 0
  296          *  if resize is not required.
  297          */
  298         num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
  299         num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
  300 
  301         if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) {
  302                 NHOPS_WUNLOCK(ctl);
  303                 FIB_CTL_LOG(LOG_INFO, ctl, "Unable to allocate nhop index");
  304                 RTSTAT_INC(rts_nh_idx_alloc_failure);
  305                 consider_resize(ctl, num_buckets_new, num_items_new);
  306                 return (0);
  307         }
  308 
  309         nh_priv->nh_idx = idx;
  310         nh_priv->nh_control = ctl;
  311         nh_priv->nh_finalized = 1;
  312 
  313         CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv);
  314 
  315         NHOPS_WUNLOCK(ctl);
  316 
  317         FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh,
  318             "Linked nhop priv %p to %d, hash %u, ctl %p",
  319             nh_priv, idx, hash_priv(nh_priv), ctl);
  320         consider_resize(ctl, num_buckets_new, num_items_new);
  321 
  322         return (idx);
  323 }
  324 
  325 /*
  326  * Unlinks nexthop specified by @nh_priv data from the hash.
  327  *
  328  * Returns found nexthop or NULL.
  329  */
  330 struct nhop_priv *
  331 unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv_del)
  332 {
  333         struct nhop_priv *priv_ret;
  334         int idx;
  335         uint32_t num_buckets_new, num_items_new;
  336 
  337         idx = 0;
  338 
  339         NHOPS_WLOCK(ctl);
  340         CHT_SLIST_REMOVE(&ctl->nh_head, nhops, nh_priv_del, priv_ret);
  341 
  342         if (priv_ret != NULL) {
  343                 idx = priv_ret->nh_idx;
  344                 priv_ret->nh_idx = 0;
  345 
  346                 KASSERT((idx != 0), ("bogus nhop index 0"));
  347                 if ((bitmask_free_idx(&ctl->nh_idx_head, idx)) != 0) {
  348                         FIB_CTL_LOG(LOG_DEBUG, ctl,
  349                             "Unable to remove index %d from fib %u af %d",
  350                             idx, ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family);
  351                 }
  352         }
  353 
  354         /* Check if hash or index needs to be resized */
  355         num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
  356         num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
  357 
  358         NHOPS_WUNLOCK(ctl);
  359 
  360         if (priv_ret == NULL) {
  361                 FIB_CTL_LOG(LOG_INFO, ctl,
  362                     "Unable to unlink nhop priv %p from hash, hash %u ctl %p",
  363                     nh_priv_del, hash_priv(nh_priv_del), ctl);
  364         } else {
  365                 FIB_CTL_LOG(LOG_DEBUG2, ctl, "Unlinked nhop %p priv idx %d",
  366                     priv_ret, idx);
  367         }
  368 
  369         consider_resize(ctl, num_buckets_new, num_items_new);
  370 
  371         return (priv_ret);
  372 }
  373 
  374 /*
  375  * Searches for the nexthop by data specifcied in @nh_priv.
  376  * Returns referenced nexthop or NULL.
  377  */
  378 struct nhop_priv *
  379 find_nhop(struct nh_control *ctl, const struct nhop_priv *nh_priv)
  380 {
  381         struct nhop_priv *nh_priv_ret;
  382 
  383         NHOPS_RLOCK(ctl);
  384         CHT_SLIST_FIND_BYOBJ(&ctl->nh_head, nhops, nh_priv, nh_priv_ret);
  385         if (nh_priv_ret != NULL) {
  386                 if (refcount_acquire_if_not_zero(&nh_priv_ret->nh_refcnt) == 0){
  387                         /* refcount was 0 -> nhop is being deleted */
  388                         nh_priv_ret = NULL;
  389                 }
  390         }
  391         NHOPS_RUNLOCK(ctl);
  392 
  393         return (nh_priv_ret);
  394 }

Cache object: 0f2a7fba943bd227b7566e6af82d6d84


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.