The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/route/nhgrp_ctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2020 Alexander V. Chernikov
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  *
   27  * $FreeBSD$
   28  */
   29 #include "opt_inet.h"
   30 #include "opt_route.h"
   31 
   32 #include <sys/cdefs.h>
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/lock.h>
   36 #include <sys/rmlock.h>
   37 #include <sys/malloc.h>
   38 #include <sys/mbuf.h>
   39 #include <sys/refcount.h>
   40 #include <sys/socket.h>
   41 #include <sys/sysctl.h>
   42 #include <sys/kernel.h>
   43 #include <sys/epoch.h>
   44 
   45 #include <net/if.h>
   46 #include <net/if_var.h>
   47 #include <net/if_private.h>
   48 #include <net/route.h>
   49 #include <net/route/route_ctl.h>
   50 #include <net/route/route_var.h>
   51 #include <net/vnet.h>
   52 
   53 #include <netinet/in.h>
   54 #include <netinet/in_var.h>
   55 #include <netinet/in_fib.h>
   56 
   57 #include <net/route/nhop_utils.h>
   58 #include <net/route/nhop.h>
   59 #include <net/route/nhop_var.h>
   60 #include <net/route/nhgrp_var.h>
   61 
   62 #define DEBUG_MOD_NAME  nhgrp_ctl
   63 #define DEBUG_MAX_LEVEL LOG_DEBUG
   64 #include <net/route/route_debug.h>
   65 _DECLARE_DEBUG(LOG_INFO);
   66 
   67 /*
   68  * This file contains the supporting functions for creating multipath groups
   69  *  and compiling their dataplane parts.
   70  */
   71 
   72 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */
   73 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH,
   74     "MPF_MULTIPATH must be the same as NHF_MULTIPATH");
   75 /* Offset and size of flags field has to be the same for nhop/nhop groups */
   76 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags);
   77 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */
   78 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64);
   79 
   80 static int wn_cmp_idx(const void *a, const void *b);
   81 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops);
   82 
   83 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl,
   84     struct weightened_nhop *wn, int num_nhops, uint32_t uidx, int *perror);
   85 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv);
   86 static void destroy_nhgrp_epoch(epoch_context_t ctx);
   87 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv);
   88 
   89 static int
   90 wn_cmp_idx(const void *a, const void *b)
   91 {
   92         const struct weightened_nhop *w_a = a;
   93         const struct weightened_nhop *w_b = b;
   94         uint32_t a_idx = w_a->nh->nh_priv->nh_idx;
   95         uint32_t b_idx = w_b->nh->nh_priv->nh_idx;
   96 
   97         if (a_idx < b_idx)
   98                 return (-1);
   99         else if (a_idx > b_idx)
  100                 return (1);
  101         else
  102                 return (0);
  103 }
  104 
  105 /*
  106  * Perform in-place sorting for array of nexthops in @wn.
  107  * Sort by nexthop index ascending.
  108  */
  109 static void
  110 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops)
  111 {
  112 
  113         qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp_idx);
  114 }
  115 
  116 /*
  117  * In order to determine the minimum weight difference in the array
  118  * of weights, create a sorted array of weights, using spare "storage"
  119  * field in the `struct weightened_nhop`.
  120  * Assume weights to be (mostly) the same and use insertion sort to
  121  * make it sorted.
  122  */
  123 static void
  124 sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items)
  125 {
  126         wn[0].storage = wn[0].weight;
  127         for (int i = 1, j = 0; i < num_items; i++) {
  128                 uint32_t weight = wn[i].weight; // read from 'weight' as it's not reordered
  129                 /* Move all weights > weight 1 position right */
  130                 for (j = i - 1; j >= 0 && wn[j].storage > weight; j--)
  131                         wn[j + 1].storage = wn[j].storage;
  132                 wn[j + 1].storage = weight;
  133         }
  134 }
  135 
  136 /*
  137  * Calculate minimum number of slots required to fit the existing
  138  * set of weights in the common use case where weights are "easily"
  139  * comparable.
  140  * Assumes @wn is sorted by weight ascending and each weight is > 0.
  141  * Returns number of slots or 0 if precise calculation failed.
  142  *
  143  * Some examples:
  144  * note: (i, X) pair means (nhop=i, weight=X):
  145  * (1, 1) (2, 2) -> 3 slots [1, 2, 2]
  146  * (1, 100), (2, 200) -> 3 slots [1, 2, 2]
  147  * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3]
  148  */
  149 static uint32_t
  150 calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items,
  151     uint64_t *ptotal)
  152 {
  153         uint32_t i, last, xmin;
  154         uint64_t total = 0;
  155 
  156         // Get sorted array of weights in .storage field
  157         sort_weightened_nhops_weights(wn, num_items);
  158 
  159         last = 0;
  160         xmin = wn[0].storage;
  161         for (i = 0; i < num_items; i++) {
  162                 total += wn[i].storage;
  163                 if ((wn[i].storage != last) &&
  164                     ((wn[i].storage - last < xmin) || xmin == 0)) {
  165                         xmin = wn[i].storage - last;
  166                 }
  167                 last = wn[i].storage;
  168         }
  169         *ptotal = total;
  170         /* xmin is the minimum unit of desired capacity */
  171         if ((total % xmin) != 0)
  172                 return (0);
  173         for (i = 0; i < num_items; i++) {
  174                 if ((wn[i].weight % xmin) != 0)
  175                         return (0);
  176         }
  177 
  178         return ((uint32_t)(total / xmin));
  179 }
  180 
  181 /*
  182  * Calculate minimum number of slots required to fit the existing
  183  * set of weights while maintaining weight coefficients.
  184  *
  185  * Assume @wn is sorted by weight ascending and each weight is > 0.
  186  *
  187  * Tries to find simple precise solution first and falls back to
  188  *  RIB_MAX_MPATH_WIDTH in case of any failure.
  189  */
  190 static uint32_t
  191 calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items)
  192 {
  193         uint32_t v;
  194         uint64_t total;
  195 
  196         v = calc_min_mpath_slots_fast(wn, num_items, &total);
  197         if (total == 0)
  198                 return (0);
  199         if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH))
  200                 v = RIB_MAX_MPATH_WIDTH;
  201 
  202         return (v);
  203 }
  204 
  205 /*
  206  * Nexthop group data consists of
  207  * 1) dataplane part, with nhgrp_object as a header followed by an
  208  *   arbitrary number of nexthop pointers.
  209  * 2) control plane part, with nhgrp_priv as a header, followed by
  210  *   an arbirtrary number of 'struct weightened_nhop' object.
  211  *
  212  * Given nexthop groups are (mostly) immutable, allocate all data
  213  * in one go.
  214  *
  215  */
  216 __noinline static size_t
  217 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops)
  218 {
  219         size_t sz;
  220 
  221         sz = sizeof(struct nhgrp_object);
  222         sz += nhg_size * sizeof(struct nhop_object *);
  223         sz += sizeof(struct nhgrp_priv);
  224         sz += num_nhops * sizeof(struct weightened_nhop);
  225         return (sz);
  226 }
  227 
  228 /*
  229  * Compile actual list of nexthops to be used by datapath from
  230  *  the nexthop group @dst.
  231  *
  232  * For example, compiling control plane list of 2 nexthops
  233  *  [(200, A), (100, B)] would result in the datapath array
  234  *  [A, A, B]
  235  */
  236 static void
  237 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x,
  238     uint32_t num_slots)
  239 {
  240         struct nhgrp_object *dst;
  241         int i, slot_idx, remaining_slots;
  242         uint64_t remaining_sum, nh_weight, nh_slots;
  243 
  244         slot_idx  = 0;
  245         dst = dst_priv->nhg;
  246         /* Calculate sum of all weights */
  247         remaining_sum = 0;
  248         for (i = 0; i < dst_priv->nhg_nh_count; i++)
  249                 remaining_sum += x[i].weight;
  250         remaining_slots = num_slots;
  251         FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d",
  252             remaining_sum, remaining_slots);
  253         for (i = 0; i < dst_priv->nhg_nh_count; i++) {
  254                 /* Calculate number of slots for the current nexthop */
  255                 if (remaining_sum > 0) {
  256                         nh_weight = (uint64_t)x[i].weight;
  257                         nh_slots = (nh_weight * remaining_slots / remaining_sum);
  258                 } else
  259                         nh_slots = 0;
  260 
  261                 remaining_sum -= x[i].weight;
  262                 remaining_slots -= nh_slots;
  263 
  264                 FIB_NH_LOG(LOG_DEBUG3, x[0].nh,
  265                     " rem_sum: %lu, rem_slots: %d nh_slots: %d, slot_idx: %d",
  266                     remaining_sum, remaining_slots, (int)nh_slots, slot_idx);
  267 
  268                 KASSERT((slot_idx + nh_slots <= num_slots),
  269                     ("index overflow during nhg compilation"));
  270                 while (nh_slots-- > 0)
  271                         dst->nhops[slot_idx++] = x[i].nh;
  272         }
  273 }
  274 
  275 /*
  276  * Allocates new nexthop group for the list of weightened nexthops.
  277  * Assume sorted list.
  278  * Does NOT reference any nexthops in the group.
  279  * Returns group with refcount=1 or NULL.
  280  */
  281 static struct nhgrp_priv *
  282 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops)
  283 {
  284         uint32_t nhgrp_size;
  285         struct nhgrp_object *nhg;
  286         struct nhgrp_priv *nhg_priv;
  287 
  288         nhgrp_size = calc_min_mpath_slots(wn, num_nhops);
  289         if (nhgrp_size == 0) {
  290                 /* Zero weights, abort */
  291                 return (NULL);
  292         }
  293 
  294         size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops);
  295         nhg = malloc(sz, M_NHOP, M_NOWAIT | M_ZERO);
  296         if (nhg == NULL) {
  297                 FIB_NH_LOG(LOG_INFO, wn[0].nh,
  298                     "unable to allocate group with num_nhops %d (compiled %u)",
  299                     num_nhops, nhgrp_size);
  300                 return (NULL);
  301         }
  302 
  303         /* Has to be the first to make NHGRP_PRIV() work */
  304         nhg->nhg_size = nhgrp_size;
  305         nhg->nhg_flags = MPF_MULTIPATH;
  306 
  307         nhg_priv = NHGRP_PRIV(nhg);
  308         nhg_priv->nhg_nh_count = num_nhops;
  309         refcount_init(&nhg_priv->nhg_refcount, 1);
  310 
  311         /* Please see nhgrp_free() comments on the initial value */
  312         refcount_init(&nhg_priv->nhg_linked, 2);
  313 
  314         nhg_priv->nhg = nhg;
  315         memcpy(&nhg_priv->nhg_nh_weights[0], wn,
  316           num_nhops * sizeof(struct weightened_nhop));
  317 
  318         FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u",
  319             num_nhops, nhgrp_size);
  320 
  321         compile_nhgrp(nhg_priv, wn, nhg->nhg_size);
  322 
  323         return (nhg_priv);
  324 }
  325 
  326 void
  327 nhgrp_ref_object(struct nhgrp_object *nhg)
  328 {
  329         struct nhgrp_priv *nhg_priv;
  330         u_int old __diagused;
  331 
  332         nhg_priv = NHGRP_PRIV(nhg);
  333         old = refcount_acquire(&nhg_priv->nhg_refcount);
  334         KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg));
  335 }
  336 
  337 void
  338 nhgrp_free(struct nhgrp_object *nhg)
  339 {
  340         struct nhgrp_priv *nhg_priv;
  341         struct nh_control *ctl;
  342         struct epoch_tracker et;
  343 
  344         nhg_priv = NHGRP_PRIV(nhg);
  345 
  346         if (!refcount_release(&nhg_priv->nhg_refcount))
  347                 return;
  348 
  349         /*
  350          * group objects don't have an explicit lock attached to it.
  351          * As groups are reclaimed based on reference count, it is possible
  352          * that some groups will persist after vnet destruction callback
  353          * called. Given that, handle scenario with nhgrp_free_group() being
  354          * called either after or simultaneously with nhgrp_ctl_unlink_all()
  355          * by using another reference counter: nhg_linked.
  356          *
  357          * There are only 2 places, where nhg_linked can be decreased:
  358          *  rib destroy (nhgrp_ctl_unlink_all) and this function.
  359          * nhg_link can never be increased.
  360          *
  361          * Hence, use initial value of 2 to make use of
  362          *  refcount_release_if_not_last().
  363          *
  364          * There can be two scenarious when calling this function:
  365          *
  366          * 1) nhg_linked value is 2. This means that either
  367          *  nhgrp_ctl_unlink_all() has not been called OR it is running,
  368          *  but we are guaranteed that nh_control won't be freed in
  369          *  this epoch. Hence, nexthop can be safely unlinked.
  370          *
  371          * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all()
  372          *  has been called and nhgrp unlink can be skipped.
  373          */
  374 
  375         NET_EPOCH_ENTER(et);
  376         if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) {
  377                 ctl = nhg_priv->nh_control;
  378                 if (unlink_nhgrp(ctl, nhg_priv) == NULL) {
  379                         /* Do not try to reclaim */
  380                         RT_LOG(LOG_INFO, "Failed to unlink nexhop group %p",
  381                             nhg_priv);
  382                         NET_EPOCH_EXIT(et);
  383                         return;
  384                 }
  385         }
  386         NET_EPOCH_EXIT(et);
  387 
  388         KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0"));
  389         NET_EPOCH_CALL(destroy_nhgrp_epoch, &nhg_priv->nhg_epoch_ctx);
  390 }
  391 
  392 /*
  393  * Destroys all local resources belonging to @nhg_priv.
  394  */
  395 __noinline static void
  396 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv)
  397 {
  398 
  399         free(nhg_priv->nhg, M_NHOP);
  400 }
  401 
  402 __noinline static void
  403 destroy_nhgrp(struct nhgrp_priv *nhg_priv)
  404 {
  405 
  406         KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0"));
  407         KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0"));
  408 
  409         IF_DEBUG_LEVEL(LOG_DEBUG2) {
  410                 char nhgbuf[NHOP_PRINT_BUFSIZE] __unused;
  411                 FIB_NH_LOG(LOG_DEBUG2, nhg_priv->nhg_nh_weights[0].nh,
  412                     "destroying %s", nhgrp_print_buf(nhg_priv->nhg,
  413                     nhgbuf, sizeof(nhgbuf)));
  414         }
  415 
  416         free_nhgrp_nhops(nhg_priv);
  417         destroy_nhgrp_int(nhg_priv);
  418 }
  419 
  420 /*
  421  * Epoch callback indicating group is safe to destroy
  422  */
  423 static void
  424 destroy_nhgrp_epoch(epoch_context_t ctx)
  425 {
  426         struct nhgrp_priv *nhg_priv;
  427 
  428         nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx);
  429 
  430         destroy_nhgrp(nhg_priv);
  431 }
  432 
  433 static bool
  434 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
  435 {
  436 
  437         for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
  438                 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0)
  439                         continue;
  440 
  441                 /*
  442                  * Failed to ref the nexthop, b/c it's deleted.
  443                  * Need to rollback references back.
  444                  */
  445                 for (int j = 0; j < i; j++)
  446                         nhop_free(nhg_priv->nhg_nh_weights[j].nh);
  447                 return (false);
  448         }
  449 
  450         return (true);
  451 }
  452 
  453 static void
  454 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
  455 {
  456 
  457         for (int i = 0; i < nhg_priv->nhg_nh_count; i++)
  458                 nhop_free(nhg_priv->nhg_nh_weights[i].nh);
  459 }
  460 
  461 /*
  462  * Allocate nexthop group of size @num_nhops with nexthops specified by
  463  * @wn. Nexthops have to be unique and match the fibnum/family of the group.
  464  * Returns unlinked nhgrp object on success or NULL and non-zero perror.
  465  */
  466 struct nhgrp_object *
  467 nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nhops,
  468     int *perror)
  469 {
  470         struct rib_head *rh = rt_tables_get_rnh(fibnum, family);
  471         struct nhgrp_priv *nhg_priv;
  472         struct nh_control *ctl;
  473 
  474         if (rh == NULL) {
  475                 *perror = E2BIG;
  476                 return (NULL);
  477         }
  478 
  479         ctl = rh->nh_control;
  480 
  481         if (num_nhops > RIB_MAX_MPATH_WIDTH) {
  482                 *perror = E2BIG;
  483                 return (NULL);
  484         }
  485 
  486         if (ctl->gr_head.hash_size == 0) {
  487                 /* First multipath request. Bootstrap mpath datastructures. */
  488                 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) {
  489                         *perror = ENOMEM;
  490                         return (NULL);
  491                 }
  492         }
  493 
  494         /* Sort nexthops & check there are no duplicates */
  495         sort_weightened_nhops(wn, num_nhops);
  496         uint32_t last_id = 0;
  497         for (int i = 0; i < num_nhops; i++) {
  498                 if (wn[i].nh->nh_priv->nh_control != ctl) {
  499                         *perror = EINVAL;
  500                         return (NULL);
  501                 }
  502                 if (wn[i].nh->nh_priv->nh_idx == last_id) {
  503                         *perror = EEXIST;
  504                         return (NULL);
  505                 }
  506                 last_id = wn[i].nh->nh_priv->nh_idx;
  507         }
  508 
  509         if ((nhg_priv = alloc_nhgrp(wn, num_nhops)) == NULL) {
  510                 *perror = ENOMEM;
  511                 return (NULL);
  512         }
  513         nhg_priv->nh_control = ctl;
  514 
  515         *perror = 0;
  516         return (nhg_priv->nhg);
  517 }
  518 
  519 /*
  520  * Finds an existing group matching @nhg or links @nhg to the tree.
  521  * Returns the referenced group or NULL and non-zero @perror.
  522  */
  523 struct nhgrp_object *
  524 nhgrp_get_nhgrp(struct nhgrp_object *nhg, int *perror)
  525 {
  526         struct nhgrp_priv *nhg_priv, *key = NHGRP_PRIV(nhg);
  527         struct nh_control *ctl = key->nh_control;
  528 
  529         nhg_priv = find_nhgrp(ctl, key);
  530         if (nhg_priv != NULL) {
  531                 /*
  532                  * Free originally-created group. As it hasn't been linked
  533                  *  and the dependent nexhops haven't been referenced, just free
  534                  *  the group.
  535                  */
  536                 destroy_nhgrp_int(key);
  537                 *perror = 0;
  538                 return (nhg_priv->nhg);
  539         } else {
  540                 /* No existing group, try to link the new one */
  541                 if (!ref_nhgrp_nhops(key)) {
  542                         /*
  543                          * Some of the nexthops have been scheduled for deletion.
  544                          * As the group hasn't been linked / no nexhops have been
  545                          *  referenced, call the final destructor immediately.
  546                          */
  547                         destroy_nhgrp_int(key);
  548                         *perror = EAGAIN;
  549                         return (NULL);
  550                 }
  551                 if (link_nhgrp(ctl, key) == 0) {
  552                         /* Unable to allocate index? */
  553                         *perror = EAGAIN;
  554                         free_nhgrp_nhops(key);
  555                         destroy_nhgrp_int(key);
  556                         return (NULL);
  557                 }
  558                 *perror = 0;
  559                 return (nhg);
  560         }
  561 
  562         /* NOTREACHED */
  563 }
  564 
  565 /*
  566  * Creates or looks up an existing nexthop group based on @wn and @num_nhops.
  567  *
  568  * Returns referenced nhop group or NULL, passing error code in @perror.
  569  */
  570 struct nhgrp_priv *
  571 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops,
  572     uint32_t uidx, int *perror)
  573 {
  574         struct nhgrp_object *nhg;
  575 
  576         nhg = nhgrp_alloc(ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family,
  577             wn, num_nhops, perror);
  578         if (nhg == NULL)
  579                 return (NULL);
  580         nhgrp_set_uidx(nhg, uidx);
  581         nhg = nhgrp_get_nhgrp(nhg, perror);
  582         if (nhg != NULL)
  583                 return (NHGRP_PRIV(nhg));
  584         return (NULL);
  585 }
  586 
  587 
  588 /*
  589  * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig.
  590  *
  591  * Returns referenced nexthop group or NULL. In the latter case, @perror is
  592  *  filled with an error code.
  593  * Note that function does NOT care if the next nexthops already exists
  594  * in the @gr_orig. As a result, they will be added, resulting in the
  595  * same nexthop being present multiple times in the new group.
  596  */
  597 static struct nhgrp_priv *
  598 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig,
  599     struct weightened_nhop *wn, int num_nhops, int *perror)
  600 {
  601         char storage[64];
  602         struct weightened_nhop *pnhops;
  603         struct nhgrp_priv *nhg_priv;
  604         const struct nhgrp_priv *src_priv;
  605         size_t sz;
  606         int curr_nhops;
  607 
  608         src_priv = NHGRP_PRIV_CONST(gr_orig);
  609         curr_nhops = src_priv->nhg_nh_count;
  610 
  611         *perror = 0;
  612 
  613         sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop));
  614         /* optimize for <= 4 paths, each path=16 bytes */
  615         if (sz <= sizeof(storage))
  616                 pnhops = (struct weightened_nhop *)&storage[0];
  617         else {
  618                 pnhops = malloc(sz, M_TEMP, M_NOWAIT);
  619                 if (pnhops == NULL) {
  620                         *perror = ENOMEM;
  621                         return (NULL);
  622                 }
  623         }
  624 
  625         /* Copy nhops from original group first */
  626         memcpy(pnhops, src_priv->nhg_nh_weights,
  627           curr_nhops * sizeof(struct weightened_nhop));
  628         memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop));
  629         curr_nhops += num_nhops;
  630 
  631         nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, 0, perror);
  632 
  633         if (pnhops != (struct weightened_nhop *)&storage[0])
  634                 free(pnhops, M_TEMP);
  635 
  636         if (nhg_priv == NULL)
  637                 return (NULL);
  638 
  639         return (nhg_priv);
  640 }
  641 
  642 
  643 /*
  644  * Creates/finds nexthop group based on @wn and @num_nhops.
  645  * Returns 0 on success with referenced group in @rnd, or
  646  * errno.
  647  *
  648  * If the error is EAGAIN, then the operation can be retried.
  649  */
  650 int
  651 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops,
  652     uint32_t uidx, struct nhgrp_object **pnhg)
  653 {
  654         struct nh_control *ctl = rh->nh_control;
  655         struct nhgrp_priv *nhg_priv;
  656         int error;
  657 
  658         nhg_priv = get_nhgrp(ctl, wn, num_nhops, uidx, &error);
  659         if (nhg_priv != NULL)
  660                 *pnhg = nhg_priv->nhg;
  661 
  662         return (error);
  663 }
  664 
  665 /*
  666  * Creates new nexthop group based on @src group without the nexthops
  667  * chosen by @flt_func.
  668  * Returns 0 on success, storring the reference nhop group/object in @rnd.
  669  */
  670 int
  671 nhgrp_get_filtered_group(struct rib_head *rh, const struct rtentry *rt,
  672     const struct nhgrp_object *src, rib_filter_f_t flt_func, void *flt_data,
  673     struct route_nhop_data *rnd)
  674 {
  675         char storage[64];
  676         struct nh_control *ctl = rh->nh_control;
  677         struct weightened_nhop *pnhops;
  678         const struct nhgrp_priv *mp_priv, *src_priv;
  679         size_t sz;
  680         int error, i, num_nhops;
  681 
  682         src_priv = NHGRP_PRIV_CONST(src);
  683 
  684         sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop));
  685         /* optimize for <= 4 paths, each path=16 bytes */
  686         if (sz <= sizeof(storage))
  687                 pnhops = (struct weightened_nhop *)&storage[0];
  688         else {
  689                 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL)
  690                         return (ENOMEM);
  691         }
  692 
  693         /* Filter nexthops */
  694         error = 0;
  695         num_nhops = 0;
  696         for (i = 0; i < src_priv->nhg_nh_count; i++) {
  697                 if (flt_func(rt, src_priv->nhg_nh_weights[i].nh, flt_data))
  698                         continue;
  699                 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i],
  700                   sizeof(struct weightened_nhop));
  701         }
  702 
  703         if (num_nhops == 0) {
  704                 rnd->rnd_nhgrp = NULL;
  705                 rnd->rnd_weight = 0;
  706         } else if (num_nhops == 1) {
  707                 rnd->rnd_nhop = pnhops[0].nh;
  708                 rnd->rnd_weight = pnhops[0].weight;
  709                 if (nhop_try_ref_object(rnd->rnd_nhop) == 0)
  710                         error = EAGAIN;
  711         } else {
  712                 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, 0, &error);
  713                 if (mp_priv != NULL)
  714                         rnd->rnd_nhgrp = mp_priv->nhg;
  715                 rnd->rnd_weight = 0;
  716         }
  717 
  718         if (pnhops != (struct weightened_nhop *)&storage[0])
  719                 free(pnhops, M_TEMP);
  720 
  721         return (error);
  722 }
  723 
  724 /*
  725  * Creates new multipath group based on existing group/nhop in @rnd_orig and
  726  *  to-be-added nhop @wn_add.
  727  * Returns 0 on success and stores result in @rnd_new.
  728  */
  729 int
  730 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig,
  731     struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new)
  732 {
  733         struct nh_control *ctl = rh->nh_control;
  734         struct nhgrp_priv *nhg_priv;
  735         struct weightened_nhop wn[2] = {};
  736         int error;
  737 
  738         if (rnd_orig->rnd_nhop == NULL) {
  739                 /* No paths to add to, just reference current nhop */
  740                 *rnd_new = *rnd_add;
  741                 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0)
  742                         return (EAGAIN);
  743                 return (0);
  744         }
  745 
  746         wn[0].nh = rnd_add->rnd_nhop;
  747         wn[0].weight = rnd_add->rnd_weight;
  748 
  749         if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) {
  750                 /* Simple merge of 2 non-multipath nexthops */
  751                 wn[1].nh = rnd_orig->rnd_nhop;
  752                 wn[1].weight = rnd_orig->rnd_weight;
  753                 nhg_priv = get_nhgrp(ctl, wn, 2, 0, &error);
  754         } else {
  755                 /* Get new nhop group with @rt->rt_nhop as an additional nhop */
  756                 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1,
  757                     &error);
  758         }
  759 
  760         if (nhg_priv == NULL)
  761                 return (error);
  762         rnd_new->rnd_nhgrp = nhg_priv->nhg;
  763         rnd_new->rnd_weight = 0;
  764 
  765         return (0);
  766 }
  767 
  768 /*
  769  * Returns pointer to array of nexthops with weights for
  770  * given @nhg. Stores number of items in the array into @pnum_nhops.
  771  */
  772 const struct weightened_nhop *
  773 nhgrp_get_nhops(const struct nhgrp_object *nhg, uint32_t *pnum_nhops)
  774 {
  775         const struct nhgrp_priv *nhg_priv;
  776 
  777         KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
  778 
  779         nhg_priv = NHGRP_PRIV_CONST(nhg);
  780         *pnum_nhops = nhg_priv->nhg_nh_count;
  781 
  782         return (nhg_priv->nhg_nh_weights);
  783 }
  784 
  785 void
  786 nhgrp_set_uidx(struct nhgrp_object *nhg, uint32_t uidx)
  787 {
  788         struct nhgrp_priv *nhg_priv;
  789 
  790         KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
  791 
  792         nhg_priv = NHGRP_PRIV(nhg);
  793 
  794         nhg_priv->nhg_uidx = uidx;
  795 }
  796 
  797 uint32_t
  798 nhgrp_get_uidx(const struct nhgrp_object *nhg)
  799 {
  800         const struct nhgrp_priv *nhg_priv;
  801 
  802         KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
  803 
  804         nhg_priv = NHGRP_PRIV_CONST(nhg);
  805         return (nhg_priv->nhg_uidx);
  806 }
  807 
  808 /*
  809  * Prints nexhop group @nhg data in the provided @buf.
  810  * Example: nhg#33/sz=3:[#1:100,#2:100,#3:100]
  811  * Example: nhg#33/sz=5:[#1:100,#2:100,..]
  812  */
  813 char *
  814 nhgrp_print_buf(const struct nhgrp_object *nhg, char *buf, size_t bufsize)
  815 {
  816         const struct nhgrp_priv *nhg_priv = NHGRP_PRIV_CONST(nhg);
  817 
  818         int off = snprintf(buf, bufsize, "nhg#%u/sz=%u:[", nhg_priv->nhg_idx,
  819             nhg_priv->nhg_nh_count);
  820 
  821         for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
  822                 const struct weightened_nhop *wn = &nhg_priv->nhg_nh_weights[i];
  823                 int len = snprintf(&buf[off], bufsize - off, "#%u:%u,",
  824                     wn->nh->nh_priv->nh_idx, wn->weight);
  825                 if (len + off + 3 >= bufsize) {
  826                         int len = snprintf(&buf[off], bufsize - off, "...");
  827                         off += len;
  828                         break;
  829                 }
  830                 off += len;
  831         }
  832         if (off > 0)
  833                 off--; // remove last ","
  834         if (off + 1 < bufsize)
  835                 snprintf(&buf[off], bufsize - off, "]");
  836         return buf;
  837 }
  838 
  839 __noinline static int
  840 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv,
  841     char *buffer, size_t buffer_size, struct sysctl_req *w)
  842 {
  843         struct rt_msghdr *rtm;
  844         struct nhgrp_external *nhge;
  845         struct nhgrp_container *nhgc;
  846         const struct nhgrp_object *nhg;
  847         struct nhgrp_nhop_external *ext;
  848         int error;
  849         size_t sz;
  850 
  851         nhg = nhg_priv->nhg;
  852 
  853         sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
  854         /* controlplane nexthops */
  855         sz += sizeof(struct nhgrp_container);
  856         sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
  857         /* dataplane nexthops */
  858         sz += sizeof(struct nhgrp_container);
  859         sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
  860 
  861         KASSERT(sz <= buffer_size, ("increase nhgrp buffer size"));
  862 
  863         bzero(buffer, sz);
  864 
  865         rtm = (struct rt_msghdr *)buffer;
  866         rtm->rtm_msglen = sz;
  867         rtm->rtm_version = RTM_VERSION;
  868         rtm->rtm_type = RTM_GET;
  869 
  870         nhge = (struct nhgrp_external *)(rtm + 1);
  871 
  872         nhge->nhg_idx = nhg_priv->nhg_idx;
  873         nhge->nhg_refcount = nhg_priv->nhg_refcount;
  874 
  875         /* fill in control plane nexthops firs */
  876         nhgc = (struct nhgrp_container *)(nhge + 1);
  877         nhgc->nhgc_type = NHG_C_TYPE_CNHOPS;
  878         nhgc->nhgc_subtype = 0;
  879         nhgc->nhgc_len = sizeof(struct nhgrp_container);
  880         nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
  881         nhgc->nhgc_count = nhg_priv->nhg_nh_count;
  882 
  883         ext = (struct nhgrp_nhop_external *)(nhgc + 1);
  884         for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
  885                 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx;
  886                 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight;
  887         }
  888 
  889         /* fill in dataplane nexthops */
  890         nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]);
  891         nhgc->nhgc_type = NHG_C_TYPE_DNHOPS;
  892         nhgc->nhgc_subtype = 0;
  893         nhgc->nhgc_len = sizeof(struct nhgrp_container);
  894         nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
  895         nhgc->nhgc_count = nhg->nhg_size;
  896 
  897         ext = (struct nhgrp_nhop_external *)(nhgc + 1);
  898         for (int i = 0; i < nhg->nhg_size; i++) {
  899                 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx;
  900                 ext[i].nh_weight = 0;
  901         }
  902 
  903         error = SYSCTL_OUT(w, buffer, sz);
  904 
  905         return (error);
  906 }
  907 
  908 uint32_t
  909 nhgrp_get_idx(const struct nhgrp_object *nhg)
  910 {
  911         const struct nhgrp_priv *nhg_priv;
  912 
  913         nhg_priv = NHGRP_PRIV_CONST(nhg);
  914         return (nhg_priv->nhg_idx);
  915 }
  916 
  917 uint8_t
  918 nhgrp_get_origin(const struct nhgrp_object *nhg)
  919 {
  920         return (NHGRP_PRIV_CONST(nhg)->nhg_origin);
  921 }
  922 
  923 void
  924 nhgrp_set_origin(struct nhgrp_object *nhg, uint8_t origin)
  925 {
  926         NHGRP_PRIV(nhg)->nhg_origin = origin;
  927 }
  928 
  929 uint32_t
  930 nhgrp_get_count(struct rib_head *rh)
  931 {
  932         struct nh_control *ctl;
  933         uint32_t count;
  934 
  935         ctl = rh->nh_control;
  936 
  937         NHOPS_RLOCK(ctl);
  938         count = ctl->gr_head.items_count;
  939         NHOPS_RUNLOCK(ctl);
  940 
  941         return (count);
  942 }
  943 
  944 int
  945 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w)
  946 {
  947         struct nh_control *ctl = rh->nh_control;
  948         struct epoch_tracker et;
  949         struct nhgrp_priv *nhg_priv;
  950         char *buffer;
  951         size_t sz;
  952         int error = 0;
  953 
  954         if (ctl->gr_head.items_count == 0)
  955                 return (0);
  956 
  957         /* Calculate the maximum nhop group size in bytes */
  958         sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
  959         sz += 2 * sizeof(struct nhgrp_container);
  960         sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH;
  961         buffer = malloc(sz, M_TEMP, M_NOWAIT);
  962         if (buffer == NULL)
  963                 return (ENOMEM);
  964 
  965         NET_EPOCH_ENTER(et);
  966         NHOPS_RLOCK(ctl);
  967         CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
  968                 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w);
  969                 if (error != 0)
  970                         break;
  971         } CHT_SLIST_FOREACH_END;
  972         NHOPS_RUNLOCK(ctl);
  973         NET_EPOCH_EXIT(et);
  974 
  975         free(buffer, M_TEMP);
  976 
  977         return (error);
  978 }

Cache object: e38c4d103b7556385b8fe16338f39ef9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.