The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netlink/route/nexthop.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 #include "opt_inet.h"
   31 #include "opt_inet6.h"
   32 #include "opt_route.h"
   33 #include <sys/types.h>
   34 #include <sys/ck.h>
   35 #include <sys/epoch.h>
   36 #include <sys/kernel.h>
   37 #include <sys/malloc.h>
   38 #include <sys/rmlock.h>
   39 #include <sys/socket.h>
   40 
   41 #include <net/if.h>
   42 #include <net/route.h>
   43 #include <net/route/nhop.h>
   44 #include <net/route/nhop_utils.h>
   45 
   46 #include <net/route/route_ctl.h>
   47 #include <net/route/route_var.h>
   48 #include <netinet6/scope6_var.h>
   49 #include <netlink/netlink.h>
   50 #include <netlink/netlink_ctl.h>
   51 #include <netlink/netlink_route.h>
   52 #include <netlink/route/route_var.h>
   53 
   54 #define DEBUG_MOD_NAME  nl_nhop
   55 #define DEBUG_MAX_LEVEL LOG_DEBUG3
   56 #include <netlink/netlink_debug.h>
   57 _DECLARE_DEBUG(LOG_DEBUG3);
   58 
   59 /*
   60  * This file contains the logic to maintain kernel nexthops and
   61  *  nexhop groups based om the data provided by the user.
   62  *
   63  * Kernel stores (nearly) all of the routing data in the nexthops,
   64  *  including the prefix-specific flags (NHF_HOST and NHF_DEFAULT).
   65  *
   66  * Netlink API provides higher-level abstraction for the user. Each
   67  *  user-created nexthop may map to multiple kernel nexthops.
   68  *
   69  * The following variations require separate kernel nexthop to be
   70  *  created:
   71  *  * prefix flags (NHF_HOST, NHF_DEFAULT)
   72  *  * using IPv6 gateway for IPv4 routes
   73  *  * different fibnum
   74  *
   75  * These kernel nexthops have the lifetime bound to the lifetime of
   76  *  the user_nhop object. They are not collected until user requests
   77  *  to delete the created user_nhop.
   78  *
   79  */
   80 struct user_nhop {
   81         uint32_t                        un_idx; /* Userland-provided index */
   82         uint32_t                        un_fibfam; /* fibnum+af(as highest byte) */
   83         uint8_t                         un_protocol; /* protocol that install the record */
   84         struct nhop_object              *un_nhop; /* "production" nexthop */
   85         struct nhop_object              *un_nhop_src; /* nexthop to copy from */
   86         struct weightened_nhop          *un_nhgrp_src; /* nexthops for nhg */
   87         uint32_t                        un_nhgrp_count; /* number of nexthops */
   88         struct user_nhop                *un_next; /* next item in hash chain */
   89         struct user_nhop                *un_nextchild; /* master -> children */
   90         struct epoch_context            un_epoch_ctx;   /* epoch ctl helper */
   91 };
   92 
   93 /* produce hash value for an object */
   94 #define unhop_hash_obj(_obj)    (hash_unhop(_obj))
   95 /* compare two objects */
   96 #define unhop_cmp(_one, _two)   (cmp_unhop(_one, _two))
   97 /* next object accessor */
   98 #define unhop_next(_obj)        (_obj)->un_next
   99 
  100 CHT_SLIST_DEFINE(unhop, struct user_nhop);
  101 
  102 struct unhop_ctl {
  103         struct unhop_head       un_head;
  104         struct rmlock           un_lock;
  105 };
  106 #define UN_LOCK_INIT(_ctl)      rm_init(&(_ctl)->un_lock, "unhop_ctl")
  107 #define UN_TRACKER              struct rm_priotracker un_tracker
  108 #define UN_RLOCK(_ctl)          rm_rlock(&((_ctl)->un_lock), &un_tracker)
  109 #define UN_RUNLOCK(_ctl)        rm_runlock(&((_ctl)->un_lock), &un_tracker)
  110 
  111 #define UN_WLOCK(_ctl)          rm_wlock(&(_ctl)->un_lock);
  112 #define UN_WUNLOCK(_ctl)        rm_wunlock(&(_ctl)->un_lock);
  113 
  114 VNET_DEFINE_STATIC(struct unhop_ctl *, un_ctl) = NULL;
  115 #define V_un_ctl        VNET(un_ctl)
  116 
  117 static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size);
  118 static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b);
  119 static unsigned int hash_unhop(const struct user_nhop *obj);
  120 
  121 static void destroy_unhop(struct user_nhop *unhop);
  122 static struct nhop_object *clone_unhop(const struct user_nhop *unhop,
  123     uint32_t fibnum, int family, int nh_flags);
  124 
  125 static int
  126 cmp_unhop(const struct user_nhop *a, const struct user_nhop *b)
  127 {
  128         return (a->un_idx == b->un_idx && a->un_fibfam == b->un_fibfam);
  129 }
  130 
  131 /*
  132  * Hash callback: calculate hash of an object
  133  */
  134 static unsigned int
  135 hash_unhop(const struct user_nhop *obj)
  136 {
  137         return (obj->un_idx ^ obj->un_fibfam);
  138 }
  139 
  140 #define UNHOP_IS_MASTER(_unhop) ((_unhop)->un_fibfam == 0)
  141 
  142 /*
  143  * Factory interface for creating matching kernel nexthops/nexthop groups
  144  *
  145  * @uidx: userland nexhop index used to create the nexthop
  146  * @fibnum: fibnum nexthop will be used in
  147  * @family: upper family nexthop will be used in
  148  * @nh_flags: desired nexthop prefix flags
  149  * @perror: pointer to store error to
  150  *
  151  * Returns referenced nexthop linked to @fibnum/@family rib on success.
  152  */
  153 struct nhop_object *
  154 nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx,
  155     int nh_flags, int *perror)
  156 {
  157         struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
  158         UN_TRACKER;
  159 
  160         if (__predict_false(ctl == NULL))
  161                 return (NULL);
  162 
  163         struct user_nhop key= {
  164                 .un_idx = uidx,
  165                 .un_fibfam = fibnum  | ((uint32_t)family) << 24,
  166         };
  167         struct user_nhop *unhop;
  168 
  169         nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT);
  170 
  171         if (__predict_false(family == 0))
  172                 return (NULL);
  173 
  174         UN_RLOCK(ctl);
  175         CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
  176         if (unhop != NULL) {
  177                 struct nhop_object *nh = unhop->un_nhop;
  178                 UN_RLOCK(ctl);
  179                 *perror = 0;
  180                 nhop_ref_any(nh);
  181                 return (nh);
  182         }
  183 
  184         /*
  185          * Exact nexthop not found. Search for template nexthop to clone from.
  186          */
  187         key.un_fibfam = 0;
  188         CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
  189         if (unhop == NULL) {
  190                 UN_RUNLOCK(ctl);
  191                 *perror = ESRCH;
  192                 return (NULL);
  193         }
  194 
  195         UN_RUNLOCK(ctl);
  196 
  197         /* Create entry to insert first */
  198         struct user_nhop *un_new, *un_tmp;
  199         un_new = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO);
  200         if (un_new == NULL) {
  201                 *perror = ENOMEM;
  202                 return (NULL);
  203         }
  204         un_new->un_idx = uidx;
  205         un_new->un_fibfam = fibnum  | ((uint32_t)family) << 24;
  206 
  207         /* Relying on epoch to protect unhop here */
  208         un_new->un_nhop = clone_unhop(unhop, fibnum, family, nh_flags);
  209         if (un_new->un_nhop == NULL) {
  210                 free(un_new, M_NETLINK);
  211                 *perror = ENOMEM;
  212                 return (NULL);
  213         }
  214 
  215         /* Insert back and report */
  216         UN_WLOCK(ctl);
  217 
  218         /* First, find template record once again */
  219         CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
  220         if (unhop == NULL) {
  221                 /* Someone deleted the nexthop during the call */
  222                 UN_WUNLOCK(ctl);
  223                 *perror = ESRCH;
  224                 destroy_unhop(un_new);
  225                 return (NULL);
  226         }
  227 
  228         /* Second, check the direct match */
  229         CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, un_new, un_tmp);
  230         struct nhop_object *nh;
  231         if (un_tmp != NULL) {
  232                 /* Another thread already created the desired nextop, use it */
  233                 nh = un_tmp->un_nhop;
  234         } else {
  235                 /* Finally, insert the new nexthop and link it to the primary */
  236                 nh = un_new->un_nhop;
  237                 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, un_new);
  238                 un_new->un_nextchild = unhop->un_nextchild;
  239                 unhop->un_nextchild = un_new;
  240                 un_new = NULL;
  241                 NL_LOG(LOG_DEBUG2, "linked cloned nexthop %p", nh);
  242         }
  243 
  244         UN_WUNLOCK(ctl);
  245 
  246         if (un_new != NULL)
  247                 destroy_unhop(un_new);
  248 
  249         *perror = 0;
  250         nhop_ref_any(nh);
  251         return (nh);
  252 }
  253 
  254 static struct user_nhop *
  255 nl_find_base_unhop(struct unhop_ctl *ctl, uint32_t uidx)
  256 {
  257         struct user_nhop key= { .un_idx = uidx };
  258         struct user_nhop *unhop = NULL;
  259         UN_TRACKER;
  260 
  261         UN_RLOCK(ctl);
  262         CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
  263         UN_RUNLOCK(ctl);
  264 
  265         return (unhop);
  266 }
  267 
  268 #define MAX_STACK_NHOPS 4
  269 static struct nhop_object *
  270 clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags)
  271 {
  272 #ifdef ROUTE_MPATH
  273         const struct weightened_nhop *wn;
  274         struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS];
  275         uint32_t num_nhops;
  276 #endif
  277         struct nhop_object *nh = NULL;
  278         int error;
  279 
  280         if (unhop->un_nhop_src != NULL) {
  281                 IF_DEBUG_LEVEL(LOG_DEBUG2) {
  282                         char nhbuf[NHOP_PRINT_BUFSIZE];
  283                         nhop_print_buf_any(unhop->un_nhop_src, nhbuf, sizeof(nhbuf));
  284                         FIB_NH_LOG(LOG_DEBUG2, unhop->un_nhop_src,
  285                             "cloning nhop %s -> %u.%u flags 0x%X", nhbuf, fibnum,
  286                             family, nh_flags);
  287                 }
  288                 struct nhop_object *nh;
  289                 nh = nhop_alloc(fibnum, AF_UNSPEC);
  290                 if (nh == NULL)
  291                         return (NULL);
  292                 nhop_copy(nh, unhop->un_nhop_src);
  293                 /* Check that nexthop gateway is compatible with the new family */
  294                 if (!nhop_set_upper_family(nh, family)) {
  295                         nhop_free(nh);
  296                         return (NULL);
  297                 }
  298                 nhop_set_uidx(nh, unhop->un_idx);
  299                 nhop_set_pxtype_flag(nh, nh_flags);
  300                 return (nhop_get_nhop(nh, &error));
  301         }
  302 #ifdef ROUTE_MPATH
  303         wn = unhop->un_nhgrp_src;
  304         num_nhops = unhop->un_nhgrp_count;
  305 
  306         if (num_nhops > MAX_STACK_NHOPS) {
  307                 wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT);
  308                 if (wn_new == NULL)
  309                         return (NULL);
  310         } else
  311                 wn_new = wn_base;
  312 
  313         for (int i = 0; i < num_nhops; i++) {
  314                 uint32_t uidx = nhop_get_uidx(wn[i].nh);
  315                 MPASS(uidx != 0);
  316                 wn_new[i].nh = nl_find_nhop(fibnum, family, uidx, nh_flags, &error);
  317                 if (error != 0)
  318                         break;
  319                 wn_new[i].weight = wn[i].weight;
  320         }
  321 
  322         if (error == 0) {
  323                 struct rib_head *rh = nhop_get_rh(wn_new[0].nh);
  324                 struct nhgrp_object *nhg;
  325 
  326                 error = nhgrp_get_group(rh, wn_new, num_nhops, unhop->un_idx, &nhg);
  327                 nh = (struct nhop_object *)nhg;
  328         }
  329 
  330         if (wn_new != wn_base)
  331                 free(wn_new, M_TEMP);
  332 #endif
  333         return (nh);
  334 }
  335 
  336 static void
  337 destroy_unhop(struct user_nhop *unhop)
  338 {
  339         if (unhop->un_nhop != NULL)
  340                 nhop_free_any(unhop->un_nhop);
  341         if (unhop->un_nhop_src != NULL)
  342                 nhop_free_any(unhop->un_nhop_src);
  343         free(unhop, M_NETLINK);
  344 }
  345 
  346 static void
  347 destroy_unhop_epoch(epoch_context_t ctx)
  348 {
  349         struct user_nhop *unhop;
  350 
  351         unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx);
  352 
  353         destroy_unhop(unhop);
  354 }
  355 
  356 static uint32_t
  357 find_spare_uidx(struct unhop_ctl *ctl)
  358 {
  359         struct user_nhop *unhop, key = {};
  360         uint32_t uidx = 0;
  361         UN_TRACKER;
  362 
  363         UN_RLOCK(ctl);
  364         /* This should return spare uid with 75% of 65k used in ~99/100 cases */
  365         for (int i = 0; i < 16; i++) {
  366                 key.un_idx = (arc4random() % 65536) + 65536 * 4;
  367                 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
  368                 if (unhop == NULL) {
  369                         uidx = key.un_idx;
  370                         break;
  371                 }
  372         }
  373         UN_RUNLOCK(ctl);
  374 
  375         return (uidx);
  376 }
  377 
  378 
  379 /*
  380  * Actual netlink code
  381  */
  382 struct netlink_walkargs {
  383         struct nl_writer *nw;
  384         struct nlmsghdr hdr;
  385         struct nlpcb *so;
  386         int family;
  387         int error;
  388         int count;
  389         int dumped;
  390 };
  391 #define ENOMEM_IF_NULL(_v)      if ((_v) == NULL) goto enomem
  392 
  393 static bool
  394 dump_nhgrp(const struct user_nhop *unhop, struct nlmsghdr *hdr,
  395     struct nl_writer *nw)
  396 {
  397 
  398         if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg)))
  399                 goto enomem;
  400 
  401         struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg);
  402         nhm->nh_family = AF_UNSPEC;
  403         nhm->nh_scope = 0;
  404         nhm->nh_protocol = unhop->un_protocol;
  405         nhm->nh_flags = 0;
  406 
  407         nlattr_add_u32(nw, NHA_ID, unhop->un_idx);
  408         nlattr_add_u16(nw, NHA_GROUP_TYPE, NEXTHOP_GRP_TYPE_MPATH);
  409 
  410         struct weightened_nhop *wn = unhop->un_nhgrp_src;
  411         uint32_t num_nhops = unhop->un_nhgrp_count;
  412         /* TODO: a better API? */
  413         int nla_len = sizeof(struct nlattr);
  414         nla_len += NETLINK_ALIGN(num_nhops * sizeof(struct nexthop_grp));
  415         struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
  416         if (nla == NULL)
  417                 goto enomem;
  418         nla->nla_type = NHA_GROUP;
  419         nla->nla_len = nla_len;
  420         for (int i = 0; i < num_nhops; i++) {
  421                 struct nexthop_grp *grp = &((struct nexthop_grp *)(nla + 1))[i];
  422                 grp->id = nhop_get_uidx(wn[i].nh);
  423                 grp->weight = wn[i].weight;
  424                 grp->resvd1 = 0;
  425                 grp->resvd2 = 0;
  426         }
  427 
  428         if (nlmsg_end(nw))
  429                 return (true);
  430 enomem:
  431         NL_LOG(LOG_DEBUG, "error: unable to allocate attribute memory");
  432         nlmsg_abort(nw);
  433         return (false);
  434 }
  435 
  436 static bool
  437 dump_nhop(const struct user_nhop *unhop, struct nlmsghdr *hdr,
  438     struct nl_writer *nw)
  439 {
  440         struct nhop_object *nh = unhop->un_nhop_src;
  441 
  442         if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg)))
  443                 goto enomem;
  444 
  445         struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg);
  446         ENOMEM_IF_NULL(nhm);
  447         nhm->nh_family = nhop_get_neigh_family(nh);
  448         nhm->nh_scope = 0; // XXX: what's that?
  449         nhm->nh_protocol = unhop->un_protocol;
  450         nhm->nh_flags = 0;
  451 
  452         nlattr_add_u32(nw, NHA_ID, unhop->un_idx);
  453         if (nh->nh_flags & NHF_BLACKHOLE) {
  454                 nlattr_add_flag(nw, NHA_BLACKHOLE);
  455                 goto done;
  456         }
  457         nlattr_add_u32(nw, NHA_OIF, nh->nh_ifp->if_index);
  458 
  459         switch (nh->gw_sa.sa_family) {
  460 #ifdef INET
  461         case AF_INET:
  462                 nlattr_add(nw, NHA_GATEWAY, 4, &nh->gw4_sa.sin_addr);
  463                 break;
  464 #endif
  465 #ifdef INET6
  466         case AF_INET6:
  467                 {
  468                         struct in6_addr addr = nh->gw6_sa.sin6_addr;
  469                         in6_clearscope(&addr);
  470                         nlattr_add(nw, NHA_GATEWAY, 16, &addr);
  471                         break;
  472                 }
  473 #endif
  474         }
  475 
  476 done:
  477         if (nlmsg_end(nw))
  478                 return (true);
  479 enomem:
  480         nlmsg_abort(nw);
  481         return (false);
  482 }
  483 
  484 static void
  485 dump_unhop(const struct user_nhop *unhop, struct nlmsghdr *hdr,
  486     struct nl_writer *nw)
  487 {
  488         if (unhop->un_nhop_src != NULL)
  489                 dump_nhop(unhop, hdr, nw);
  490         else
  491                 dump_nhgrp(unhop, hdr, nw);
  492 }
  493 
  494 static int
  495 delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx)
  496 {
  497         struct user_nhop *unhop_ret, *unhop_base, *unhop_chain;
  498 
  499         struct user_nhop key = { .un_idx = uidx };
  500 
  501         UN_WLOCK(ctl);
  502 
  503         CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop_base);
  504 
  505         if (unhop_base != NULL) {
  506                 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_base, unhop_ret);
  507                 IF_DEBUG_LEVEL(LOG_DEBUG2) {
  508                         char nhbuf[NHOP_PRINT_BUFSIZE];
  509                         nhop_print_buf_any(unhop_base->un_nhop, nhbuf, sizeof(nhbuf));
  510                         FIB_NH_LOG(LOG_DEBUG3, unhop_base->un_nhop,
  511                             "removed base nhop %u: %s", uidx, nhbuf);
  512                 }
  513                 /* Unlink all child nexhops as well, keeping the chain intact */
  514                 unhop_chain = unhop_base->un_nextchild;
  515                 while (unhop_chain != NULL) {
  516                         CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_chain,
  517                             unhop_ret);
  518                         MPASS(unhop_chain == unhop_ret);
  519                         IF_DEBUG_LEVEL(LOG_DEBUG3) {
  520                                 char nhbuf[NHOP_PRINT_BUFSIZE];
  521                                 nhop_print_buf_any(unhop_chain->un_nhop,
  522                                     nhbuf, sizeof(nhbuf));
  523                                 FIB_NH_LOG(LOG_DEBUG3, unhop_chain->un_nhop,
  524                                     "removed child nhop %u: %s", uidx, nhbuf);
  525                         }
  526                         unhop_chain = unhop_chain->un_nextchild;
  527                 }
  528         }
  529 
  530         UN_WUNLOCK(ctl);
  531 
  532         if (unhop_base == NULL) {
  533                 NL_LOG(LOG_DEBUG, "unable to find unhop %u", uidx);
  534                 return (ENOENT);
  535         }
  536 
  537         /* Report nexthop deletion */
  538         struct netlink_walkargs wa = {
  539                 .hdr.nlmsg_pid = hdr->nlmsg_pid,
  540                 .hdr.nlmsg_seq = hdr->nlmsg_seq,
  541                 .hdr.nlmsg_flags = hdr->nlmsg_flags,
  542                 .hdr.nlmsg_type = NL_RTM_DELNEXTHOP,
  543         };
  544 
  545         struct nl_writer nw = {};
  546         if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) {
  547                 NL_LOG(LOG_DEBUG, "error allocating message writer");
  548                 return (ENOMEM);
  549         }
  550 
  551         dump_unhop(unhop_base, &wa.hdr, &nw);
  552         nlmsg_flush(&nw);
  553 
  554         while (unhop_base != NULL) {
  555                 unhop_chain = unhop_base->un_nextchild;
  556                 NET_EPOCH_CALL(destroy_unhop_epoch, &unhop_base->un_epoch_ctx);
  557                 unhop_base = unhop_chain;
  558         }
  559 
  560         return (0);
  561 }
  562 
  563 static void
  564 consider_resize(struct unhop_ctl *ctl, uint32_t new_size)
  565 {
  566         void *new_ptr = NULL;
  567         size_t alloc_size;
  568 
  569         if (new_size == 0)
  570                 return;
  571 
  572         if (new_size != 0) {
  573                 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_size);
  574                 new_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO);
  575                 if (new_ptr == NULL)
  576                         return;
  577         }
  578 
  579         NL_LOG(LOG_DEBUG, "resizing hash: %u -> %u", ctl->un_head.hash_size, new_size);
  580         UN_WLOCK(ctl);
  581         if (new_ptr != NULL) {
  582                 CHT_SLIST_RESIZE(&ctl->un_head, unhop, new_ptr, new_size);
  583         }
  584         UN_WUNLOCK(ctl);
  585 
  586 
  587         if (new_ptr != NULL)
  588                 free(new_ptr, M_NETLINK);
  589 }
  590 
  591 static bool __noinline
  592 vnet_init_unhops(void)
  593 {
  594         uint32_t num_buckets = 16;
  595         size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
  596 
  597         struct unhop_ctl *ctl = malloc(sizeof(struct unhop_ctl), M_NETLINK,
  598             M_NOWAIT | M_ZERO);
  599         if (ctl == NULL)
  600                 return (false);
  601 
  602         void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO);
  603         if (ptr == NULL) {
  604                 free(ctl, M_NETLINK);
  605                 return (false);
  606         }
  607         CHT_SLIST_INIT(&ctl->un_head, ptr, num_buckets);
  608         UN_LOCK_INIT(ctl);
  609 
  610         if (!atomic_cmpset_ptr((uintptr_t *)&V_un_ctl, (uintptr_t)NULL, (uintptr_t)ctl)) {
  611                 free(ptr, M_NETLINK);
  612                 free(ctl, M_NETLINK);
  613         }
  614 
  615         if (atomic_load_ptr(&V_un_ctl) == NULL)
  616                 return (false);
  617 
  618         NL_LOG(LOG_NOTICE, "UNHOPS init done");
  619 
  620         return (true);
  621 }
  622 
  623 static void
  624 vnet_destroy_unhops(const void *unused __unused)
  625 {
  626         struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
  627         struct user_nhop *unhop, *tmp;
  628 
  629         if (ctl == NULL)
  630                 return;
  631         V_un_ctl = NULL;
  632 
  633         /* Wait till all unhop users finish their reads */
  634         NET_EPOCH_WAIT();
  635 
  636         UN_WLOCK(ctl);
  637         CHT_SLIST_FOREACH_SAFE(&ctl->un_head, unhop, unhop, tmp) {
  638                 destroy_unhop(unhop);
  639         } CHT_SLIST_FOREACH_SAFE_END;
  640         UN_WUNLOCK(ctl);
  641 
  642         free(ctl->un_head.ptr, M_NETLINK);
  643         free(ctl, M_NETLINK);
  644 }
  645 VNET_SYSUNINIT(vnet_destroy_unhops, SI_SUB_PROTO_IF, SI_ORDER_ANY,
  646     vnet_destroy_unhops, NULL);
  647 
  648 static int
  649 nlattr_get_nhg(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
  650 {
  651         int error = 0;
  652 
  653         /* Verify attribute correctness */
  654         struct nexthop_grp *grp = NLA_DATA(nla);
  655         int data_len = NLA_DATA_LEN(nla);
  656 
  657         int count = data_len / sizeof(*grp);
  658         if (count == 0 || (count * sizeof(*grp) != data_len)) {
  659                 NL_LOG(LOG_DEBUG, "Invalid length for RTA_GROUP: %d", data_len);
  660                 return (EINVAL);
  661         }
  662 
  663         *((struct nlattr **)target) = nla;
  664         return (error);
  665 }
  666 
  667 struct nl_parsed_nhop {
  668         uint32_t        nha_id;
  669         uint8_t         nha_blackhole;
  670         uint8_t         nha_groups;
  671         struct ifnet    *nha_oif;
  672         struct sockaddr *nha_gw;
  673         struct nlattr   *nha_group;
  674         uint8_t         nh_family;
  675         uint8_t         nh_protocol;
  676 };
  677 
  678 #define _IN(_field)     offsetof(struct nhmsg, _field)
  679 #define _OUT(_field)    offsetof(struct nl_parsed_nhop, _field)
  680 static const struct nlfield_parser nlf_p_nh[] = {
  681         { .off_in = _IN(nh_family), .off_out = _OUT(nh_family), .cb = nlf_get_u8 },
  682         { .off_in = _IN(nh_protocol), .off_out = _OUT(nh_protocol), .cb = nlf_get_u8 },
  683 };
  684 
  685 static const struct nlattr_parser nla_p_nh[] = {
  686         { .type = NHA_ID, .off = _OUT(nha_id), .cb = nlattr_get_uint32 },
  687         { .type = NHA_GROUP, .off = _OUT(nha_group), .cb = nlattr_get_nhg },
  688         { .type = NHA_BLACKHOLE, .off = _OUT(nha_blackhole), .cb = nlattr_get_flag },
  689         { .type = NHA_OIF, .off = _OUT(nha_oif), .cb = nlattr_get_ifp },
  690         { .type = NHA_GATEWAY, .off = _OUT(nha_gw), .cb = nlattr_get_ip },
  691         { .type = NHA_GROUPS, .off = _OUT(nha_groups), .cb = nlattr_get_flag },
  692 };
  693 #undef _IN
  694 #undef _OUT
  695 NL_DECLARE_PARSER(nhmsg_parser, struct nhmsg, nlf_p_nh, nla_p_nh);
  696 
  697 static bool
  698 eligible_nhg(const struct nhop_object *nh)
  699 {
  700         return (nh->nh_flags & NHF_GATEWAY);
  701 }
  702 
  703 static int
  704 newnhg(struct unhop_ctl *ctl, struct nl_parsed_nhop *attrs, struct user_nhop *unhop)
  705 {
  706         struct nexthop_grp *grp = NLA_DATA(attrs->nha_group);
  707         int count = NLA_DATA_LEN(attrs->nha_group) / sizeof(*grp);
  708         struct weightened_nhop *wn;
  709 
  710         wn = malloc(sizeof(*wn) * count, M_NETLINK, M_NOWAIT | M_ZERO);
  711         if (wn == NULL)
  712                 return (ENOMEM);
  713 
  714         for (int i = 0; i < count; i++) {
  715                 struct user_nhop *unhop;
  716                 unhop = nl_find_base_unhop(ctl, grp[i].id);
  717                 if (unhop == NULL) {
  718                         NL_LOG(LOG_DEBUG, "unable to find uidx %u", grp[i].id);
  719                         free(wn, M_NETLINK);
  720                         return (ESRCH);
  721                 } else if (unhop->un_nhop_src == NULL) {
  722                         NL_LOG(LOG_DEBUG, "uidx %u is a group, nested group unsupported",
  723                             grp[i].id);
  724                         free(wn, M_NETLINK);
  725                         return (ENOTSUP);
  726                 } else if (!eligible_nhg(unhop->un_nhop_src)) {
  727                         NL_LOG(LOG_DEBUG, "uidx %u nhop is not mpath-eligible",
  728                             grp[i].id);
  729                         free(wn, M_NETLINK);
  730                         return (ENOTSUP);
  731                 }
  732                 /*
  733                  * TODO: consider more rigid eligibility checks:
  734                  * restrict nexthops with the same gateway
  735                  */
  736                 wn[i].nh = unhop->un_nhop_src;
  737                 wn[i].weight = grp[i].weight;
  738         }
  739         unhop->un_nhgrp_src = wn;
  740         unhop->un_nhgrp_count = count;
  741         return (0);
  742 }
  743 
  744 static int
  745 newnhop(struct nl_parsed_nhop *attrs, struct user_nhop *unhop)
  746 {
  747         struct ifaddr *ifa = NULL;
  748         struct nhop_object *nh;
  749         int error;
  750 
  751         if (!attrs->nha_blackhole) {
  752                 if (attrs->nha_gw == NULL) {
  753                         NL_LOG(LOG_DEBUG, "missing NHA_GATEWAY");
  754                         return (EINVAL);
  755                 }
  756                 if (attrs->nha_oif == NULL) {
  757                         NL_LOG(LOG_DEBUG, "missing NHA_OIF");
  758                         return (EINVAL);
  759                 }
  760                 if (ifa == NULL)
  761                         ifa = ifaof_ifpforaddr(attrs->nha_gw, attrs->nha_oif);
  762                 if (ifa == NULL) {
  763                         NL_LOG(LOG_DEBUG, "Unable to determine default source IP");
  764                         return (EINVAL);
  765                 }
  766         }
  767 
  768         int family = attrs->nha_gw != NULL ? attrs->nha_gw->sa_family : attrs->nh_family;
  769 
  770         nh = nhop_alloc(RT_DEFAULT_FIB, family);
  771         if (nh == NULL) {
  772                 NL_LOG(LOG_DEBUG, "Unable to allocate nexthop");
  773                 return (ENOMEM);
  774         }
  775         nhop_set_uidx(nh, attrs->nha_id);
  776 
  777         if (attrs->nha_blackhole)
  778                 nhop_set_blackhole(nh, NHF_BLACKHOLE);
  779         else {
  780                 nhop_set_gw(nh, attrs->nha_gw, true);
  781                 nhop_set_transmit_ifp(nh, attrs->nha_oif);
  782                 nhop_set_src(nh, ifa);
  783         }
  784 
  785         error = nhop_get_unlinked(nh);
  786         if (error != 0) {
  787                 NL_LOG(LOG_DEBUG, "unable to finalize nexthop");
  788                 return (error);
  789         }
  790 
  791         IF_DEBUG_LEVEL(LOG_DEBUG2) {
  792                 char nhbuf[NHOP_PRINT_BUFSIZE];
  793                 nhop_print_buf(nh, nhbuf, sizeof(nhbuf));
  794                 NL_LOG(LOG_DEBUG2, "Adding unhop %u: %s", attrs->nha_id, nhbuf);
  795         }
  796 
  797         unhop->un_nhop_src = nh;
  798         return (0);
  799 }
  800 
  801 static int
  802 rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
  803     struct nl_pstate *npt)
  804 {
  805         struct user_nhop *unhop;
  806         int error;
  807 
  808         if ((__predict_false(V_un_ctl == NULL)) && (!vnet_init_unhops()))
  809                 return (ENOMEM);
  810         struct unhop_ctl *ctl = V_un_ctl;
  811 
  812         struct nl_parsed_nhop attrs = {};
  813         error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs);
  814         if (error != 0)
  815                 return (error);
  816 
  817         /*
  818          * Get valid nha_id. Treat nha_id == 0 (auto-assignment) as a second-class
  819          *  citizen.
  820          */
  821         if (attrs.nha_id == 0) {
  822                 attrs.nha_id = find_spare_uidx(ctl);
  823                 if (attrs.nha_id == 0) {
  824                         NL_LOG(LOG_DEBUG, "Unable to get spare uidx");
  825                         return (ENOSPC);
  826                 }
  827         }
  828 
  829         NL_LOG(LOG_DEBUG, "IFINDEX %d", attrs.nha_oif ? attrs.nha_oif->if_index : 0);
  830 
  831         unhop = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO);
  832         if (unhop == NULL) {
  833                 NL_LOG(LOG_DEBUG, "Unable to allocate user_nhop");
  834                 return (ENOMEM);
  835         }
  836         unhop->un_idx = attrs.nha_id;
  837         unhop->un_protocol = attrs.nh_protocol;
  838 
  839         if (attrs.nha_group)
  840                 error = newnhg(ctl, &attrs, unhop);
  841         else
  842                 error = newnhop(&attrs, unhop);
  843 
  844         if (error != 0) {
  845                 free(unhop, M_NETLINK);
  846                 return (error);
  847         }
  848 
  849         UN_WLOCK(ctl);
  850         /* Check if uidx already exists */
  851         struct user_nhop *tmp = NULL;
  852         CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, unhop, tmp);
  853         if (tmp != NULL) {
  854                 UN_WUNLOCK(ctl);
  855                 NL_LOG(LOG_DEBUG, "nhop idx %u already exists", attrs.nha_id);
  856                 destroy_unhop(unhop);
  857                 return (EEXIST);
  858         }
  859         CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, unhop);
  860         uint32_t num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->un_head);
  861         UN_WUNLOCK(ctl);
  862 
  863         /* Report addition of the next nexhop */
  864         struct netlink_walkargs wa = {
  865                 .hdr.nlmsg_pid = hdr->nlmsg_pid,
  866                 .hdr.nlmsg_seq = hdr->nlmsg_seq,
  867                 .hdr.nlmsg_flags = hdr->nlmsg_flags,
  868                 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP,
  869         };
  870 
  871         struct nl_writer nw = {};
  872         if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) {
  873                 NL_LOG(LOG_DEBUG, "error allocating message writer");
  874                 return (ENOMEM);
  875         }
  876 
  877         dump_unhop(unhop, &wa.hdr, &nw);
  878         nlmsg_flush(&nw);
  879 
  880         consider_resize(ctl, num_buckets_new);
  881 
  882         return (0);
  883 }
  884 
  885 static int
  886 rtnl_handle_delnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
  887     struct nl_pstate *npt)
  888 {
  889         struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
  890         int error;
  891 
  892         if (__predict_false(ctl == NULL))
  893                 return (ESRCH);
  894 
  895         struct nl_parsed_nhop attrs = {};
  896         error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs);
  897         if (error != 0)
  898                 return (error);
  899 
  900         if (attrs.nha_id == 0) {
  901                 NL_LOG(LOG_DEBUG, "NHA_ID not set");
  902                 return (EINVAL);
  903         }
  904 
  905         error = delete_unhop(ctl, hdr, attrs.nha_id);
  906 
  907         return (error);
  908 }
  909 
  910 static bool
  911 match_unhop(const struct nl_parsed_nhop *attrs, struct user_nhop *unhop)
  912 {
  913         if (attrs->nha_id != 0 && unhop->un_idx != attrs->nha_id)
  914                 return (false);
  915         if (attrs->nha_groups != 0 && unhop->un_nhgrp_src == NULL)
  916                 return (false);
  917         if (attrs->nha_oif != NULL &&
  918             (unhop->un_nhop_src == NULL || unhop->un_nhop_src->nh_ifp != attrs->nha_oif))
  919                 return (false);
  920 
  921         return (true);
  922 }
  923 
  924 static int
  925 rtnl_handle_getnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
  926     struct nl_pstate *npt)
  927 {
  928         struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
  929         struct user_nhop *unhop;
  930         UN_TRACKER;
  931         int error;
  932 
  933         if (__predict_false(ctl == NULL))
  934                 return (ESRCH);
  935 
  936         struct nl_parsed_nhop attrs = {};
  937         error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs);
  938         if (error != 0)
  939                 return (error);
  940 
  941         struct netlink_walkargs wa = {
  942                 .nw = npt->nw,
  943                 .hdr.nlmsg_pid = hdr->nlmsg_pid,
  944                 .hdr.nlmsg_seq = hdr->nlmsg_seq,
  945                 .hdr.nlmsg_flags = hdr->nlmsg_flags,
  946                 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP,
  947         };
  948 
  949         if (attrs.nha_id != 0) {
  950                 NL_LOG(LOG_DEBUG2, "searching for uidx %u", attrs.nha_id);
  951                 struct user_nhop key= { .un_idx = attrs.nha_id };
  952                 UN_RLOCK(ctl);
  953                 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
  954                 UN_RUNLOCK(ctl);
  955 
  956                 if (unhop == NULL)
  957                         return (ESRCH);
  958                 dump_unhop(unhop, &wa.hdr, wa.nw);
  959                 return (0);
  960         }
  961 
  962         UN_RLOCK(ctl);
  963         wa.hdr.nlmsg_flags |= NLM_F_MULTI;
  964         CHT_SLIST_FOREACH(&ctl->un_head, unhop, unhop) {
  965                 if (UNHOP_IS_MASTER(unhop) && match_unhop(&attrs, unhop))
  966                         dump_unhop(unhop, &wa.hdr, wa.nw);
  967         } CHT_SLIST_FOREACH_END;
  968         UN_RUNLOCK(ctl);
  969 
  970         if (wa.error == 0) {
  971                 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr))
  972                         return (ENOMEM);
  973         }
  974         return (0);
  975 }
  976 
  977 static const struct rtnl_cmd_handler cmd_handlers[] = {
  978         {
  979                 .cmd = NL_RTM_NEWNEXTHOP,
  980                 .name = "RTM_NEWNEXTHOP",
  981                 .cb = &rtnl_handle_newnhop,
  982                 .priv = PRIV_NET_ROUTE,
  983         },
  984         {
  985                 .cmd = NL_RTM_DELNEXTHOP,
  986                 .name = "RTM_DELNEXTHOP",
  987                 .cb = &rtnl_handle_delnhop,
  988                 .priv = PRIV_NET_ROUTE,
  989         },
  990         {
  991                 .cmd = NL_RTM_GETNEXTHOP,
  992                 .name = "RTM_GETNEXTHOP",
  993                 .cb = &rtnl_handle_getnhop,
  994         }
  995 };
  996 
  997 static const struct nlhdr_parser *all_parsers[] = { &nhmsg_parser };
  998 
  999 void
 1000 rtnl_nexthops_init(void)
 1001 {
 1002         NL_VERIFY_PARSERS(all_parsers);
 1003         rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
 1004 }

Cache object: 5f9d50f2063ee824647d984d857249f1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.