The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/route/route_ctl.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2020 Alexander V. Chernikov
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 #include "opt_inet.h"
   31 #include "opt_inet6.h"
   32 #include "opt_route.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/malloc.h>
   37 #include <sys/mbuf.h>
   38 #include <sys/socket.h>
   39 #include <sys/sysctl.h>
   40 #include <sys/syslog.h>
   41 #include <sys/kernel.h>
   42 #include <sys/lock.h>
   43 #include <sys/rmlock.h>
   44 
   45 #include <net/if.h>
   46 #include <net/if_var.h>
   47 #include <net/if_private.h>
   48 #include <net/if_dl.h>
   49 #include <net/vnet.h>
   50 #include <net/route.h>
   51 #include <net/route/route_ctl.h>
   52 #include <net/route/route_var.h>
   53 #include <net/route/nhop_utils.h>
   54 #include <net/route/nhop.h>
   55 #include <net/route/nhop_var.h>
   56 #include <netinet/in.h>
   57 #include <netinet6/scope6_var.h>
   58 #include <netinet6/in6_var.h>
   59 
   60 #define DEBUG_MOD_NAME  route_ctl
   61 #define DEBUG_MAX_LEVEL LOG_DEBUG
   62 #include <net/route/route_debug.h>
   63 _DECLARE_DEBUG(LOG_INFO);
   64 
   65 /*
   66  * This file contains control plane routing tables functions.
   67  *
   68  * All functions assumes they are called in net epoch.
   69  */
   70 
   71 union sockaddr_union {
   72         struct sockaddr         sa;
   73         struct sockaddr_in      sin;
   74         struct sockaddr_in6     sin6;
   75         char                    _buf[32];
   76 };
   77 
   78 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
   79     struct rib_cmd_info *rc);
   80 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
   81     struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
   82     struct rib_cmd_info *rc);
   83 
   84 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
   85     struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
   86 #ifdef ROUTE_MPATH
   87 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
   88     struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
   89     int op_flags, struct rib_cmd_info *rc);
   90 #endif
   91 
   92 static int add_route(struct rib_head *rnh, struct rtentry *rt,
   93     struct route_nhop_data *rnd, struct rib_cmd_info *rc);
   94 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
   95     struct rib_cmd_info *rc);
   96 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
   97     int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
   98 
   99 static int get_prio_from_info(const struct rt_addrinfo *info);
  100 static int nhop_get_prio(const struct nhop_object *nh);
  101 
  102 #ifdef ROUTE_MPATH
  103 static bool rib_can_multipath(struct rib_head *rh);
  104 #endif
  105 
  106 /* Per-vnet multipath routing configuration */
  107 SYSCTL_DECL(_net_route);
  108 #define V_rib_route_multipath   VNET(rib_route_multipath)
  109 #ifdef ROUTE_MPATH
  110 #define _MP_FLAGS       CTLFLAG_RW
  111 #else
  112 #define _MP_FLAGS       CTLFLAG_RD
  113 #endif
  114 VNET_DEFINE(u_int, rib_route_multipath) = 1;
  115 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
  116     &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
  117 #undef _MP_FLAGS
  118 
  119 #ifdef ROUTE_MPATH
  120 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
  121 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
  122     &VNET_NAME(fib_hash_outbound), 0,
  123     "Compute flowid for locally-originated packets");
  124 
  125 /* Default entropy to add to the hash calculation for the outbound connections*/
  126 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
  127         0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
  128         0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
  129         0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
  130         0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
  131         0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
  132 };
  133 #endif
  134 
  135 #if defined(INET) && defined(INET6)
  136 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
  137 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
  138 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
  139 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
  140     &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
  141 #endif
  142 
  143 /* Debug bits */
  144 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
  145 
  146 static struct rib_head *
  147 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
  148 {
  149         struct rib_head *rnh;
  150         struct sockaddr *dst;
  151 
  152         KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
  153 
  154         dst = info->rti_info[RTAX_DST];
  155         rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
  156 
  157         return (rnh);
  158 }
  159 
  160 #if defined(INET) && defined(INET6)
  161 bool
  162 rib_can_4o6_nhop(void)
  163 {
  164         return (!!V_rib_route_ipv6_nexthop);
  165 }
  166 #endif
  167 
  168 #ifdef ROUTE_MPATH
  169 static bool
  170 rib_can_multipath(struct rib_head *rh)
  171 {
  172         int result;
  173 
  174         CURVNET_SET(rh->rib_vnet);
  175         result = !!V_rib_route_multipath;
  176         CURVNET_RESTORE();
  177 
  178         return (result);
  179 }
  180 
  181 /*
  182  * Check is nhop is multipath-eligible.
  183  * Avoid nhops without gateways and redirects.
  184  *
  185  * Returns 1 for multipath-eligible nexthop,
  186  * 0 otherwise.
  187  */
  188 bool
  189 nhop_can_multipath(const struct nhop_object *nh)
  190 {
  191 
  192         if ((nh->nh_flags & NHF_MULTIPATH) != 0)
  193                 return (1);
  194         if ((nh->nh_flags & NHF_GATEWAY) == 0)
  195                 return (0);
  196         if ((nh->nh_flags & NHF_REDIRECT) != 0)
  197                 return (0);
  198 
  199         return (1);
  200 }
  201 #endif
  202 
  203 static int
  204 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
  205 {
  206         uint32_t weight;
  207 
  208         if (info->rti_mflags & RTV_WEIGHT)
  209                 weight = info->rti_rmx->rmx_weight;
  210         else
  211                 weight = default_weight;
  212         /* Keep upper 1 byte for adm distance purposes */
  213         if (weight > RT_MAX_WEIGHT)
  214                 weight = RT_MAX_WEIGHT;
  215         else if (weight == 0)
  216                 weight = default_weight;
  217 
  218         return (weight);
  219 }
  220 
  221 /*
  222  * File-local concept for distingushing between the normal and
  223  * RTF_PINNED routes tha can override the "normal" one.
  224  */
  225 #define NH_PRIORITY_HIGH        2
  226 #define NH_PRIORITY_NORMAL      1
  227 static int
  228 get_prio_from_info(const struct rt_addrinfo *info)
  229 {
  230         if (info->rti_flags & RTF_PINNED)
  231                 return (NH_PRIORITY_HIGH);
  232         return (NH_PRIORITY_NORMAL);
  233 }
  234 
  235 static int
  236 nhop_get_prio(const struct nhop_object *nh)
  237 {
  238         if (NH_IS_PINNED(nh))
  239                 return (NH_PRIORITY_HIGH);
  240         return (NH_PRIORITY_NORMAL);
  241 }
  242 
  243 /*
  244  * Check if specified @gw matches gw data in the nexthop @nh.
  245  *
  246  * Returns true if matches, false otherwise.
  247  */
  248 bool
  249 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
  250 {
  251 
  252         if (nh->gw_sa.sa_family != gw->sa_family)
  253                 return (false);
  254 
  255         switch (gw->sa_family) {
  256         case AF_INET:
  257                 return (nh->gw4_sa.sin_addr.s_addr ==
  258                     ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
  259         case AF_INET6:
  260                 {
  261                         const struct sockaddr_in6 *gw6;
  262                         gw6 = (const struct sockaddr_in6 *)gw;
  263 
  264                         /*
  265                          * Currently (2020-09) IPv6 gws in kernel have their
  266                          * scope embedded. Once this becomes false, this code
  267                          * has to be revisited.
  268                          */
  269                         if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
  270                             &gw6->sin6_addr))
  271                                 return (true);
  272                         return (false);
  273                 }
  274         case AF_LINK:
  275                 {
  276                         const struct sockaddr_dl *sdl;
  277                         sdl = (const struct sockaddr_dl *)gw;
  278                         return (nh->gwl_sa.sdl_index == sdl->sdl_index);
  279                 }
  280         default:
  281                 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
  282         }
  283 
  284         /* NOTREACHED */
  285         return (false);
  286 }
  287 
  288 /*
  289  * Matches all nexthop with given @gw.
  290  * Can be used as rib_filter_f callback.
  291  */
  292 int
  293 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
  294 {
  295         const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
  296 
  297         return (match_nhop_gw(nh, gw));
  298 }
  299 
  300 struct gw_filter_data {
  301         const struct sockaddr *gw;
  302         int count;
  303 };
  304 
  305 /*
  306  * Matches first occurence of the gateway provided in @gwd
  307  */
  308 static int
  309 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
  310 {
  311         struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
  312 
  313         /* Return only first match to make rtsock happy */
  314         if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
  315                 return (1);
  316         return (0);
  317 }
  318 
  319 /*
  320  * Checks if data in @info matches nexhop @nh.
  321  *
  322  * Returns 0 on success,
  323  * ESRCH if not matched,
  324  * ENOENT if filter function returned false
  325  */
  326 int
  327 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
  328     const struct nhop_object *nh)
  329 {
  330         const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
  331 
  332         if (info->rti_filter != NULL) {
  333             if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
  334                     return (ENOENT);
  335             else
  336                     return (0);
  337         }
  338         if ((gw != NULL) && !match_nhop_gw(nh, gw))
  339                 return (ESRCH);
  340 
  341         return (0);
  342 }
  343 
  344 /*
  345  * Runs exact prefix match based on @dst and @netmask.
  346  * Returns matched @rtentry if found or NULL.
  347  * If rtentry was found, saves nexthop / weight value into @rnd.
  348  */
  349 static struct rtentry *
  350 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
  351     const struct sockaddr *netmask, struct route_nhop_data *rnd)
  352 {
  353         struct rtentry *rt;
  354 
  355         RIB_LOCK_ASSERT(rnh);
  356 
  357         rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
  358         if (rt != NULL) {
  359                 rnd->rnd_nhop = rt->rt_nhop;
  360                 rnd->rnd_weight = rt->rt_weight;
  361         } else {
  362                 rnd->rnd_nhop = NULL;
  363                 rnd->rnd_weight = 0;
  364         }
  365 
  366         return (rt);
  367 }
  368 
  369 struct rtentry *
  370 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
  371     struct route_nhop_data *rnd)
  372 {
  373         return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
  374 }
  375 
  376 /*
  377  * Runs exact prefix match based on dst/netmask from @info.
  378  * Assumes RIB lock is held.
  379  * Returns matched @rtentry if found or NULL.
  380  * If rtentry was found, saves nexthop / weight value into @rnd.
  381  */
  382 struct rtentry *
  383 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
  384     struct route_nhop_data *rnd)
  385 {
  386         struct rtentry *rt;
  387 
  388         rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
  389             info->rti_info[RTAX_NETMASK], rnd);
  390 
  391         return (rt);
  392 }
  393 
  394 static bool
  395 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
  396     struct sockaddr **pmask)
  397 {
  398         if (plen == -1) {
  399                 *pmask = NULL;
  400                 return (true);
  401         }
  402 
  403         switch (family) {
  404 #ifdef INET
  405         case AF_INET:
  406                 {
  407                         struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
  408                         struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
  409 
  410                         memset(mask, 0, sizeof(*mask));
  411                         mask->sin_family = family;
  412                         mask->sin_len = sizeof(*mask);
  413                         if (plen == 32)
  414                                 *pmask = NULL;
  415                         else if (plen > 32 || plen < 0)
  416                                 return (false);
  417                         else {
  418                                 uint32_t daddr, maddr;
  419                                 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
  420                                 mask->sin_addr.s_addr = maddr;
  421                                 daddr = dst->sin_addr.s_addr;
  422                                 daddr = htonl(ntohl(daddr) & ntohl(maddr));
  423                                 dst->sin_addr.s_addr = daddr;
  424                         }
  425                         return (true);
  426                 }
  427                 break;
  428 #endif
  429 #ifdef INET6
  430         case AF_INET6:
  431                 {
  432                         struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
  433                         struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
  434 
  435                         memset(mask, 0, sizeof(*mask));
  436                         mask->sin6_family = family;
  437                         mask->sin6_len = sizeof(*mask);
  438                         if (plen == 128)
  439                                 *pmask = NULL;
  440                         else if (plen > 128 || plen < 0)
  441                                 return (false);
  442                         else {
  443                                 ip6_writemask(&mask->sin6_addr, plen);
  444                                 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
  445                         }
  446                         return (true);
  447                 }
  448                 break;
  449 #endif
  450         }
  451         return (false);
  452 }
  453 
  454 /*
  455  * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
  456  * to the routing table.
  457  *
  458  * @fibnum: rtable id to insert route to
  459  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
  460  * @plen: prefix length (or -1 if host route or not applicable for AF)
  461  * @op_flags: combination of RTM_F_ flags
  462  * @rc: storage to report operation result
  463  *
  464  * Returns 0 on success.
  465  */
  466 int
  467 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
  468     struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
  469 {
  470         union sockaddr_union mask_storage;
  471         struct sockaddr *netmask = &mask_storage.sa;
  472         struct rtentry *rt = NULL;
  473 
  474         NET_EPOCH_ASSERT();
  475 
  476         bzero(rc, sizeof(struct rib_cmd_info));
  477         rc->rc_cmd = RTM_ADD;
  478 
  479         struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
  480         if (rnh == NULL)
  481                 return (EAFNOSUPPORT);
  482 
  483         if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
  484                 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
  485                 return (EINVAL);
  486         }
  487 
  488         if (op_flags & RTM_F_CREATE) {
  489                 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
  490                         FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
  491                         return (ENOMEM);
  492                 }
  493         }
  494 
  495         return (add_route_flags(rnh, rt, rnd, op_flags, rc));
  496 }
  497 
  498 /*
  499  * Attempts to delete @dst/plen prefix matching gateway @gw from the
  500  *  routing rable.
  501  *
  502  * @fibnum: rtable id to remove route from
  503  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
  504  * @plen: prefix length (or -1 if host route or not applicable for AF)
  505  * @gw: gateway to match
  506  * @op_flags: combination of RTM_F_ flags
  507  * @rc: storage to report operation result
  508  *
  509  * Returns 0 on success.
  510  */
  511 int
  512 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
  513     const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
  514 {
  515         struct gw_filter_data gwd = { .gw = gw };
  516 
  517         return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
  518 }
  519 
  520 /*
  521  * Attempts to delete @dst/plen prefix matching @filter_func from the
  522  *  routing rable.
  523  *
  524  * @fibnum: rtable id to remove route from
  525  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
  526  * @plen: prefix length (or -1 if host route or not applicable for AF)
  527  * @filter_func: func to be called for each nexthop of the prefix for matching
  528  * @filter_arg: argument to pass to @filter_func
  529  * @op_flags: combination of RTM_F_ flags
  530  * @rc: storage to report operation result
  531  *
  532  * Returns 0 on success.
  533  */
  534 int
  535 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
  536     rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
  537     struct rib_cmd_info *rc)
  538 {
  539         union sockaddr_union mask_storage;
  540         struct sockaddr *netmask = &mask_storage.sa;
  541         int error;
  542 
  543         NET_EPOCH_ASSERT();
  544 
  545         bzero(rc, sizeof(struct rib_cmd_info));
  546         rc->rc_cmd = RTM_DELETE;
  547 
  548         struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
  549         if (rnh == NULL)
  550                 return (EAFNOSUPPORT);
  551 
  552         if (dst->sa_len > sizeof(mask_storage)) {
  553                 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
  554                 return (EINVAL);
  555         }
  556 
  557         if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
  558                 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
  559                 return (EINVAL);
  560         }
  561 
  562         int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
  563 
  564         RIB_WLOCK(rnh);
  565         struct route_nhop_data rnd;
  566         struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
  567         if (rt != NULL) {
  568                 error = rt_delete_conditional(rnh, rt, prio, filter_func,
  569                     filter_arg, rc);
  570         } else
  571                 error = ESRCH;
  572         RIB_WUNLOCK(rnh);
  573 
  574         if (error != 0)
  575                 return (error);
  576 
  577         rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
  578 
  579         if (rc->rc_cmd == RTM_DELETE)
  580                 rt_free(rc->rc_rt);
  581 #ifdef ROUTE_MPATH
  582         else {
  583                 /*
  584                  * Deleting 1 path may result in RTM_CHANGE to
  585                  * a different mpath group/nhop.
  586                  * Free old mpath group.
  587                  */
  588                 nhop_free_any(rc->rc_nh_old);
  589         }
  590 #endif
  591 
  592         return (0);
  593 }
  594 
  595 /*
  596  * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
  597  * @rt: route to copy.
  598  * @rnd_src: nhop and weight. Multipath routes are not supported
  599  * @rh_dst: target rtable.
  600  * @rc: operation result storage
  601  *
  602  * Return 0 on success.
  603  */
  604 int
  605 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
  606     struct rib_head *rh_dst, struct rib_cmd_info *rc)
  607 {
  608         struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
  609         int error;
  610 
  611         MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
  612 
  613         IF_DEBUG_LEVEL(LOG_DEBUG2) {
  614                 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
  615                 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
  616                 rt_print_buf(rt, rtbuf, sizeof(rtbuf));
  617                 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
  618                     rtbuf, nhbuf, nhop_get_fibnum(nh_src));
  619         }
  620         struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
  621         if (nh == NULL) {
  622                 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
  623                 return (ENOMEM);
  624         }
  625         nhop_copy(nh, rnd_src->rnd_nhop);
  626         nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
  627         nhop_set_fibnum(nh, rh_dst->rib_fibnum);
  628         nh = nhop_get_nhop_internal(rh_dst, nh, &error);
  629         if (error != 0) {
  630                 FIB_RH_LOG(LOG_INFO, rh_dst,
  631                     "unable to finalize new nexthop: error %d", error);
  632                 return (ENOMEM);
  633         }
  634 
  635         struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
  636         if (rt_new == NULL) {
  637                 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
  638                 nhop_free(nh);
  639                 return (ENOMEM);
  640         }
  641 
  642         struct route_nhop_data rnd = {
  643                 .rnd_nhop = nh,
  644                 .rnd_weight = rnd_src->rnd_weight
  645         };
  646         int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
  647         error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
  648 
  649         if (error != 0) {
  650                 IF_DEBUG_LEVEL(LOG_DEBUG2) {
  651                         char buf[NHOP_PRINT_BUFSIZE];
  652                         rt_print_buf(rt_new, buf, sizeof(buf));
  653                         FIB_RH_LOG(LOG_DEBUG, rh_dst,
  654                             "Unable to add route %s: error %d", buf, error);
  655                 }
  656                 nhop_free(nh);
  657                 rt_free_immediate(rt_new);
  658         }
  659         return (error);
  660 }
  661 
  662 /*
  663  * Adds route defined by @info into the kernel table specified by @fibnum and
  664  * sa_family in @info->rti_info[RTAX_DST].
  665  *
  666  * Returns 0 on success and fills in operation metadata into @rc.
  667  */
  668 int
  669 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
  670     struct rib_cmd_info *rc)
  671 {
  672         struct rib_head *rnh;
  673         int error;
  674 
  675         NET_EPOCH_ASSERT();
  676 
  677         rnh = get_rnh(fibnum, info);
  678         if (rnh == NULL)
  679                 return (EAFNOSUPPORT);
  680 
  681         /*
  682          * Check consistency between RTF_HOST flag and netmask
  683          * existence.
  684          */
  685         if (info->rti_flags & RTF_HOST)
  686                 info->rti_info[RTAX_NETMASK] = NULL;
  687         else if (info->rti_info[RTAX_NETMASK] == NULL) {
  688                 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
  689                 return (EINVAL);
  690         }
  691 
  692         bzero(rc, sizeof(struct rib_cmd_info));
  693         rc->rc_cmd = RTM_ADD;
  694 
  695         error = add_route_byinfo(rnh, info, rc);
  696         if (error == 0)
  697                 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
  698 
  699         return (error);
  700 }
  701 
  702 static int
  703 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
  704     struct rib_cmd_info *rc)
  705 {
  706         struct route_nhop_data rnd_add;
  707         struct nhop_object *nh;
  708         struct rtentry *rt;
  709         struct sockaddr *dst, *gateway, *netmask;
  710         int error;
  711 
  712         dst = info->rti_info[RTAX_DST];
  713         gateway = info->rti_info[RTAX_GATEWAY];
  714         netmask = info->rti_info[RTAX_NETMASK];
  715 
  716         if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
  717                 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
  718                 return (EINVAL);
  719         }
  720         if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
  721                 FIB_RH_LOG(LOG_DEBUG, rnh,
  722                     "error: invalid dst/gateway family combination (%d, %d)",
  723                     dst->sa_family, gateway->sa_family);
  724                 return (EINVAL);
  725         }
  726 
  727         if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
  728                 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
  729                     dst->sa_len);
  730                 return (EINVAL);
  731         }
  732 
  733         if (info->rti_ifa == NULL) {
  734                 error = rt_getifa_fib(info, rnh->rib_fibnum);
  735                 if (error)
  736                         return (error);
  737         }
  738 
  739         if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
  740                 return (ENOBUFS);
  741 
  742         error = nhop_create_from_info(rnh, info, &nh);
  743         if (error != 0) {
  744                 rt_free_immediate(rt);
  745                 return (error);
  746         }
  747 
  748         rnd_add.rnd_nhop = nh;
  749         rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
  750 
  751         int op_flags = RTM_F_CREATE;
  752         if (get_prio_from_info(info) == NH_PRIORITY_HIGH)
  753                 op_flags |= RTM_F_FORCE;
  754         else
  755                 op_flags |= RTM_F_APPEND;
  756         return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
  757 
  758 }
  759 
  760 static int
  761 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
  762     int op_flags, struct rib_cmd_info *rc)
  763 {
  764         struct route_nhop_data rnd_orig;
  765         struct nhop_object *nh;
  766         struct rtentry *rt_orig;
  767         int error = 0;
  768 
  769         nh = rnd_add->rnd_nhop;
  770 
  771         RIB_WLOCK(rnh);
  772 
  773         rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
  774 
  775         if (rt_orig == NULL) {
  776                 if (op_flags & RTM_F_CREATE)
  777                         error = add_route(rnh, rt, rnd_add, rc);
  778                 else
  779                         error = ESRCH; /* no entry but creation was not required */
  780                 RIB_WUNLOCK(rnh);
  781                 if (error != 0)
  782                         goto out;
  783                 return (0);
  784         }
  785 
  786         if (op_flags & RTM_F_EXCL) {
  787                 /* We have existing route in the RIB but not allowed to replace. */
  788                 RIB_WUNLOCK(rnh);
  789                 error = EEXIST;
  790                 goto out;
  791         }
  792 
  793         /* Now either append or replace */
  794         if (op_flags & RTM_F_REPLACE) {
  795                 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) {
  796                         /* Old path is "better" (e.g. has PINNED flag set) */
  797                         RIB_WUNLOCK(rnh);
  798                         error = EEXIST;
  799                         goto out;
  800                 }
  801                 change_route(rnh, rt_orig, rnd_add, rc);
  802                 RIB_WUNLOCK(rnh);
  803                 nh = rc->rc_nh_old;
  804                 goto out;
  805         }
  806 
  807         RIB_WUNLOCK(rnh);
  808 
  809 #ifdef ROUTE_MPATH
  810         if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
  811             nhop_can_multipath(rnd_add->rnd_nhop) &&
  812             nhop_can_multipath(rnd_orig.rnd_nhop)) {
  813 
  814                 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
  815                         error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
  816                             op_flags, rc);
  817                         if (error != EAGAIN)
  818                                 break;
  819                         RTSTAT_INC(rts_add_retry);
  820                 }
  821 
  822                 /*
  823                  *  Original nhop reference is unused in any case.
  824                  */
  825                 nhop_free_any(rnd_add->rnd_nhop);
  826                 if (op_flags & RTM_F_CREATE) {
  827                         if (error != 0 || rc->rc_cmd != RTM_ADD)
  828                                 rt_free_immediate(rt);
  829                 }
  830                 return (error);
  831         }
  832 #endif
  833         /* Out of options - free state and return error */
  834         error = EEXIST;
  835 out:
  836         if (op_flags & RTM_F_CREATE)
  837                 rt_free_immediate(rt);
  838         nhop_free_any(nh);
  839 
  840         return (error);
  841 }
  842 
  843 #ifdef ROUTE_MPATH
  844 static int
  845 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
  846     struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
  847     int op_flags, struct rib_cmd_info *rc)
  848 {
  849         RIB_RLOCK_TRACKER;
  850         struct route_nhop_data rnd_new;
  851         int error = 0;
  852 
  853         error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
  854         if (error != 0) {
  855                 if (error == EAGAIN) {
  856                         /*
  857                          * Group creation failed, most probably because
  858                          * @rnd_orig data got scheduled for deletion.
  859                          * Refresh @rnd_orig data and retry.
  860                          */
  861                         RIB_RLOCK(rnh);
  862                         lookup_prefix_rt(rnh, rt, rnd_orig);
  863                         RIB_RUNLOCK(rnh);
  864                         if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
  865                                 /* In this iteration route doesn't exist */
  866                                 error = ENOENT;
  867                         }
  868                 }
  869                 return (error);
  870         }
  871         error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
  872         if (error != 0)
  873                 return (error);
  874 
  875         if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
  876                 /*
  877                  * First multipath route got installed. Enable local
  878                  * outbound connections hashing.
  879                  */
  880                 if (bootverbose)
  881                         printf("FIB: enabled flowid calculation for locally-originated packets\n");
  882                 V_fib_hash_outbound = 1;
  883         }
  884 
  885         return (0);
  886 }
  887 #endif
  888 
  889 /*
  890  * Removes route defined by @info from the kernel table specified by @fibnum and
  891  * sa_family in @info->rti_info[RTAX_DST].
  892  *
  893  * Returns 0 on success and fills in operation metadata into @rc.
  894  */
  895 int
  896 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
  897 {
  898         struct rib_head *rnh;
  899         struct sockaddr *dst, *netmask;
  900         struct sockaddr_storage mdst;
  901         int error;
  902 
  903         NET_EPOCH_ASSERT();
  904 
  905         rnh = get_rnh(fibnum, info);
  906         if (rnh == NULL)
  907                 return (EAFNOSUPPORT);
  908 
  909         bzero(rc, sizeof(struct rib_cmd_info));
  910         rc->rc_cmd = RTM_DELETE;
  911 
  912         dst = info->rti_info[RTAX_DST];
  913         netmask = info->rti_info[RTAX_NETMASK];
  914 
  915         if (netmask != NULL) {
  916                 /* Ensure @dst is always properly masked */
  917                 if (dst->sa_len > sizeof(mdst)) {
  918                         FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
  919                         return (EINVAL);
  920                 }
  921                 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
  922                 dst = (struct sockaddr *)&mdst;
  923         }
  924 
  925         rib_filter_f_t *filter_func = NULL;
  926         void *filter_arg = NULL;
  927         struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
  928 
  929         if (info->rti_filter != NULL) {
  930                 filter_func = info->rti_filter;
  931                 filter_arg = info->rti_filterdata;
  932         } else if (gwd.gw != NULL) {
  933                 filter_func = match_gw_one;
  934                 filter_arg = &gwd;
  935         }
  936 
  937         int prio = get_prio_from_info(info);
  938 
  939         RIB_WLOCK(rnh);
  940         struct route_nhop_data rnd;
  941         struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
  942         if (rt != NULL) {
  943                 error = rt_delete_conditional(rnh, rt, prio, filter_func,
  944                     filter_arg, rc);
  945         } else
  946                 error = ESRCH;
  947         RIB_WUNLOCK(rnh);
  948 
  949         if (error != 0)
  950                 return (error);
  951 
  952         rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
  953 
  954         if (rc->rc_cmd == RTM_DELETE)
  955                 rt_free(rc->rc_rt);
  956 #ifdef ROUTE_MPATH
  957         else {
  958                 /*
  959                  * Deleting 1 path may result in RTM_CHANGE to
  960                  * a different mpath group/nhop.
  961                  * Free old mpath group.
  962                  */
  963                 nhop_free_any(rc->rc_nh_old);
  964         }
  965 #endif
  966 
  967         return (0);
  968 }
  969 
  970 /*
  971  * Conditionally unlinks rtentry paths from @rnh matching @cb.
  972  * Returns 0 on success with operation result stored in @rc.
  973  * On error, returns:
  974  * ESRCH - if prefix was not found or filter function failed to match
  975  * EADDRINUSE - if trying to delete higher priority route.
  976  */
  977 static int
  978 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
  979     int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
  980 {
  981         struct nhop_object *nh = rt->rt_nhop;
  982 
  983 #ifdef ROUTE_MPATH
  984         if (NH_IS_NHGRP(nh)) {
  985                 struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
  986                 struct route_nhop_data rnd;
  987                 int error;
  988 
  989                 if (cb == NULL)
  990                         return (ESRCH);
  991                 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
  992                 if (error == 0) {
  993                         if (rnd.rnd_nhgrp == nhg) {
  994                                 /* No match, unreference new group and return. */
  995                                 nhop_free_any(rnd.rnd_nhop);
  996                                 return (ESRCH);
  997                         }
  998                         error = change_route(rnh, rt, &rnd, rc);
  999                 }
 1000                 return (error);
 1001         }
 1002 #endif
 1003         if (cb != NULL && !cb(rt, nh, cbdata))
 1004                 return (ESRCH);
 1005 
 1006         if (prio < nhop_get_prio(nh))
 1007                 return (EADDRINUSE);
 1008 
 1009         return (delete_route(rnh, rt, rc));
 1010 }
 1011 
 1012 int
 1013 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
 1014     struct rib_cmd_info *rc)
 1015 {
 1016         RIB_RLOCK_TRACKER;
 1017         struct route_nhop_data rnd_orig;
 1018         struct rib_head *rnh;
 1019         struct rtentry *rt;
 1020         int error;
 1021 
 1022         NET_EPOCH_ASSERT();
 1023 
 1024         rnh = get_rnh(fibnum, info);
 1025         if (rnh == NULL)
 1026                 return (EAFNOSUPPORT);
 1027 
 1028         bzero(rc, sizeof(struct rib_cmd_info));
 1029         rc->rc_cmd = RTM_CHANGE;
 1030 
 1031         /* Check if updated gateway exists */
 1032         if ((info->rti_flags & RTF_GATEWAY) &&
 1033             (info->rti_info[RTAX_GATEWAY] == NULL)) {
 1034 
 1035                 /*
 1036                  * route(8) adds RTF_GATEWAY flag if -interface is not set.
 1037                  * Remove RTF_GATEWAY to enforce consistency and maintain
 1038                  * compatibility..
 1039                  */
 1040                 info->rti_flags &= ~RTF_GATEWAY;
 1041         }
 1042 
 1043         /*
 1044          * route change is done in multiple steps, with dropping and
 1045          * reacquiring lock. In the situations with multiple processes
 1046          * changes the same route in can lead to the case when route
 1047          * is changed between the steps. Address it by retrying the operation
 1048          * multiple times before failing.
 1049          */
 1050 
 1051         RIB_RLOCK(rnh);
 1052         rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
 1053             info->rti_info[RTAX_NETMASK], &rnh->head);
 1054 
 1055         if (rt == NULL) {
 1056                 RIB_RUNLOCK(rnh);
 1057                 return (ESRCH);
 1058         }
 1059 
 1060         rnd_orig.rnd_nhop = rt->rt_nhop;
 1061         rnd_orig.rnd_weight = rt->rt_weight;
 1062 
 1063         RIB_RUNLOCK(rnh);
 1064 
 1065         for (int i = 0; i < RIB_MAX_RETRIES; i++) {
 1066                 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
 1067                 if (error != EAGAIN)
 1068                         break;
 1069         }
 1070 
 1071         return (error);
 1072 }
 1073 
 1074 static int
 1075 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
 1076     struct nhop_object *nh_orig, struct nhop_object **nh_new)
 1077 {
 1078         int error;
 1079 
 1080         /*
 1081          * New gateway could require new ifaddr, ifp;
 1082          * flags may also be different; ifp may be specified
 1083          * by ll sockaddr when protocol address is ambiguous
 1084          */
 1085         if (((nh_orig->nh_flags & NHF_GATEWAY) &&
 1086             info->rti_info[RTAX_GATEWAY] != NULL) ||
 1087             info->rti_info[RTAX_IFP] != NULL ||
 1088             (info->rti_info[RTAX_IFA] != NULL &&
 1089              !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
 1090                 error = rt_getifa_fib(info, rnh->rib_fibnum);
 1091 
 1092                 if (error != 0) {
 1093                         info->rti_ifa = NULL;
 1094                         return (error);
 1095                 }
 1096         }
 1097 
 1098         error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
 1099         info->rti_ifa = NULL;
 1100 
 1101         return (error);
 1102 }
 1103 
 1104 #ifdef ROUTE_MPATH
 1105 static int
 1106 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
 1107     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
 1108     struct rib_cmd_info *rc)
 1109 {
 1110         int error = 0, found_idx = 0;
 1111         struct nhop_object *nh_orig = NULL, *nh_new;
 1112         struct route_nhop_data rnd_new = {};
 1113         const struct weightened_nhop *wn = NULL;
 1114         struct weightened_nhop *wn_new;
 1115         uint32_t num_nhops;
 1116 
 1117         wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
 1118         for (int i = 0; i < num_nhops; i++) {
 1119                 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
 1120                         nh_orig = wn[i].nh;
 1121                         found_idx = i;
 1122                         break;
 1123                 }
 1124         }
 1125 
 1126         if (nh_orig == NULL)
 1127                 return (ESRCH);
 1128 
 1129         error = change_nhop(rnh, info, nh_orig, &nh_new);
 1130         if (error != 0)
 1131                 return (error);
 1132 
 1133         wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
 1134             M_TEMP, M_NOWAIT | M_ZERO);
 1135         if (wn_new == NULL) {
 1136                 nhop_free(nh_new);
 1137                 return (EAGAIN);
 1138         }
 1139 
 1140         memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
 1141         wn_new[found_idx].nh = nh_new;
 1142         wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
 1143 
 1144         error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
 1145         nhop_free(nh_new);
 1146         free(wn_new, M_TEMP);
 1147 
 1148         if (error != 0)
 1149                 return (error);
 1150 
 1151         error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
 1152 
 1153         return (error);
 1154 }
 1155 #endif
 1156 
 1157 static int
 1158 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
 1159     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
 1160     struct rib_cmd_info *rc)
 1161 {
 1162         int error = 0;
 1163         struct nhop_object *nh_orig;
 1164         struct route_nhop_data rnd_new;
 1165 
 1166         nh_orig = rnd_orig->rnd_nhop;
 1167         if (nh_orig == NULL)
 1168                 return (ESRCH);
 1169 
 1170 #ifdef ROUTE_MPATH
 1171         if (NH_IS_NHGRP(nh_orig))
 1172                 return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
 1173 #endif
 1174 
 1175         rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
 1176         error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
 1177         if (error != 0)
 1178                 return (error);
 1179         error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
 1180 
 1181         return (error);
 1182 }
 1183 
 1184 /*
 1185  * Insert @rt with nhop data from @rnd_new to @rnh.
 1186  * Returns 0 on success and stores operation results in @rc.
 1187  */
 1188 static int
 1189 add_route(struct rib_head *rnh, struct rtentry *rt,
 1190     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
 1191 {
 1192         struct radix_node *rn;
 1193 
 1194         RIB_WLOCK_ASSERT(rnh);
 1195 
 1196         rt->rt_nhop = rnd->rnd_nhop;
 1197         rt->rt_weight = rnd->rnd_weight;
 1198         rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
 1199 
 1200         if (rn != NULL) {
 1201                 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
 1202                         tmproutes_update(rnh, rt, rnd->rnd_nhop);
 1203 
 1204                 /* Finalize notification */
 1205                 rib_bump_gen(rnh);
 1206                 rnh->rnh_prefixes++;
 1207 
 1208                 rc->rc_cmd = RTM_ADD;
 1209                 rc->rc_rt = rt;
 1210                 rc->rc_nh_old = NULL;
 1211                 rc->rc_nh_new = rnd->rnd_nhop;
 1212                 rc->rc_nh_weight = rnd->rnd_weight;
 1213 
 1214                 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
 1215                 return (0);
 1216         }
 1217 
 1218         /* Existing route or memory allocation failure. */
 1219         return (EEXIST);
 1220 }
 1221 
 1222 /*
 1223  * Unconditionally deletes @rt from @rnh.
 1224  */
 1225 static int
 1226 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
 1227 {
 1228         RIB_WLOCK_ASSERT(rnh);
 1229 
 1230         /* Route deletion requested. */
 1231         struct radix_node *rn;
 1232 
 1233         rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
 1234         if (rn == NULL)
 1235                 return (ESRCH);
 1236         rt = RNTORT(rn);
 1237         rt->rte_flags &= ~RTF_UP;
 1238 
 1239         rib_bump_gen(rnh);
 1240         rnh->rnh_prefixes--;
 1241 
 1242         rc->rc_cmd = RTM_DELETE;
 1243         rc->rc_rt = rt;
 1244         rc->rc_nh_old = rt->rt_nhop;
 1245         rc->rc_nh_new = NULL;
 1246         rc->rc_nh_weight = rt->rt_weight;
 1247 
 1248         rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
 1249 
 1250         return (0);
 1251 }
 1252 
 1253 /*
 1254  * Switch @rt nhop/weigh to the ones specified in @rnd.
 1255  * Returns 0 on success.
 1256  */
 1257 int
 1258 change_route(struct rib_head *rnh, struct rtentry *rt,
 1259     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
 1260 {
 1261         struct nhop_object *nh_orig;
 1262 
 1263         RIB_WLOCK_ASSERT(rnh);
 1264 
 1265         nh_orig = rt->rt_nhop;
 1266 
 1267         if (rnd->rnd_nhop == NULL)
 1268                 return (delete_route(rnh, rt, rc));
 1269 
 1270         /* Changing nexthop & weight to a new one */
 1271         rt->rt_nhop = rnd->rnd_nhop;
 1272         rt->rt_weight = rnd->rnd_weight;
 1273         if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
 1274                 tmproutes_update(rnh, rt, rnd->rnd_nhop);
 1275 
 1276         /* Finalize notification */
 1277         rib_bump_gen(rnh);
 1278         rc->rc_cmd = RTM_CHANGE;
 1279         rc->rc_rt = rt;
 1280         rc->rc_nh_old = nh_orig;
 1281         rc->rc_nh_new = rnd->rnd_nhop;
 1282         rc->rc_nh_weight = rnd->rnd_weight;
 1283 
 1284         rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
 1285 
 1286         return (0);
 1287 }
 1288 
 1289 /*
 1290  * Conditionally update route nhop/weight IFF data in @nhd_orig is
 1291  *  consistent with the current route data.
 1292  * Nexthop in @nhd_new is consumed.
 1293  */
 1294 int
 1295 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
 1296     struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
 1297     struct rib_cmd_info *rc)
 1298 {
 1299         struct rtentry *rt_new;
 1300         int error = 0;
 1301 
 1302         IF_DEBUG_LEVEL(LOG_DEBUG2) {
 1303                 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
 1304                 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
 1305                 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
 1306                 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
 1307                     "trying change %s -> %s", buf_old, buf_new);
 1308         }
 1309         RIB_WLOCK(rnh);
 1310 
 1311         struct route_nhop_data rnd;
 1312         rt_new = lookup_prefix_rt(rnh, rt, &rnd);
 1313 
 1314         if (rt_new == NULL) {
 1315                 if (rnd_orig->rnd_nhop == NULL)
 1316                         error = add_route(rnh, rt, rnd_new, rc);
 1317                 else {
 1318                         /*
 1319                          * Prefix does not exist, which was not our assumption.
 1320                          * Update @rnd_orig with the new data and return
 1321                          */
 1322                         rnd_orig->rnd_nhop = NULL;
 1323                         rnd_orig->rnd_weight = 0;
 1324                         error = EAGAIN;
 1325                 }
 1326         } else {
 1327                 /* Prefix exists, try to update */
 1328                 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
 1329                         /*
 1330                          * Nhop/mpath group hasn't changed. Flip
 1331                          * to the new precalculated one and return
 1332                          */
 1333                         error = change_route(rnh, rt_new, rnd_new, rc);
 1334                 } else {
 1335                         /* Update and retry */
 1336                         rnd_orig->rnd_nhop = rt_new->rt_nhop;
 1337                         rnd_orig->rnd_weight = rt_new->rt_weight;
 1338                         error = EAGAIN;
 1339                 }
 1340         }
 1341 
 1342         RIB_WUNLOCK(rnh);
 1343 
 1344         if (error == 0) {
 1345                 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
 1346 
 1347                 if (rnd_orig->rnd_nhop != NULL)
 1348                         nhop_free_any(rnd_orig->rnd_nhop);
 1349 
 1350         } else {
 1351                 if (rnd_new->rnd_nhop != NULL)
 1352                         nhop_free_any(rnd_new->rnd_nhop);
 1353         }
 1354 
 1355         return (error);
 1356 }
 1357 
 1358 /*
 1359  * Performs modification of routing table specificed by @action.
 1360  * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
 1361  * Needs to be run in network epoch.
 1362  *
 1363  * Returns 0 on success and fills in @rc with action result.
 1364  */
 1365 int
 1366 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
 1367     struct rib_cmd_info *rc)
 1368 {
 1369         int error;
 1370 
 1371         switch (action) {
 1372         case RTM_ADD:
 1373                 error = rib_add_route(fibnum, info, rc);
 1374                 break;
 1375         case RTM_DELETE:
 1376                 error = rib_del_route(fibnum, info, rc);
 1377                 break;
 1378         case RTM_CHANGE:
 1379                 error = rib_change_route(fibnum, info, rc);
 1380                 break;
 1381         default:
 1382                 error = ENOTSUP;
 1383         }
 1384 
 1385         return (error);
 1386 }
 1387 
 1388 struct rt_delinfo
 1389 {
 1390         struct rib_head *rnh;
 1391         struct rtentry *head;
 1392         rib_filter_f_t *filter_f;
 1393         void *filter_arg;
 1394         int prio;
 1395         struct rib_cmd_info rc;
 1396 };
 1397 
 1398 /*
 1399  * Conditionally unlinks rtenties or paths from radix tree based
 1400  * on the callback data passed in @arg.
 1401  */
 1402 static int
 1403 rt_checkdelroute(struct radix_node *rn, void *arg)
 1404 {
 1405         struct rt_delinfo *di = (struct rt_delinfo *)arg;
 1406         struct rtentry *rt = (struct rtentry *)rn;
 1407 
 1408         if (rt_delete_conditional(di->rnh, rt, di->prio,
 1409             di->filter_f, di->filter_arg, &di->rc) != 0)
 1410                 return (0);
 1411 
 1412         /*
 1413          * Add deleted rtentries to the list to GC them
 1414          *  after dropping the lock.
 1415          *
 1416          * XXX: Delayed notifications not implemented
 1417          *  for nexthop updates.
 1418          */
 1419         if (di->rc.rc_cmd == RTM_DELETE) {
 1420                 /* Add to the list and return */
 1421                 rt->rt_chain = di->head;
 1422                 di->head = rt;
 1423 #ifdef ROUTE_MPATH
 1424         } else {
 1425                 /*
 1426                  * RTM_CHANGE to a different nexthop or nexthop group.
 1427                  * Free old multipath group.
 1428                  */
 1429                 nhop_free_any(di->rc.rc_nh_old);
 1430 #endif
 1431         }
 1432 
 1433         return (0);
 1434 }
 1435 
 1436 /*
 1437  * Iterates over a routing table specified by @fibnum and @family and
 1438  *  deletes elements marked by @filter_f.
 1439  * @fibnum: rtable id
 1440  * @family: AF_ address family
 1441  * @filter_f: function returning non-zero value for items to delete
 1442  * @arg: data to pass to the @filter_f function
 1443  * @report: true if rtsock notification is needed.
 1444  */
 1445 void
 1446 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
 1447     bool report)
 1448 {
 1449         struct rib_head *rnh;
 1450         struct rtentry *rt;
 1451         struct nhop_object *nh;
 1452         struct epoch_tracker et;
 1453 
 1454         rnh = rt_tables_get_rnh(fibnum, family);
 1455         if (rnh == NULL)
 1456                 return;
 1457 
 1458         struct rt_delinfo di = {
 1459                 .rnh = rnh,
 1460                 .filter_f = filter_f,
 1461                 .filter_arg = filter_arg,
 1462                 .prio = NH_PRIORITY_NORMAL,
 1463         };
 1464 
 1465         NET_EPOCH_ENTER(et);
 1466 
 1467         RIB_WLOCK(rnh);
 1468         rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
 1469         RIB_WUNLOCK(rnh);
 1470 
 1471         /* We might have something to reclaim. */
 1472         bzero(&di.rc, sizeof(di.rc));
 1473         di.rc.rc_cmd = RTM_DELETE;
 1474         while (di.head != NULL) {
 1475                 rt = di.head;
 1476                 di.head = rt->rt_chain;
 1477                 rt->rt_chain = NULL;
 1478                 nh = rt->rt_nhop;
 1479 
 1480                 di.rc.rc_rt = rt;
 1481                 di.rc.rc_nh_old = nh;
 1482                 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
 1483 
 1484                 if (report) {
 1485 #ifdef ROUTE_MPATH
 1486                         struct nhgrp_object *nhg;
 1487                         const struct weightened_nhop *wn;
 1488                         uint32_t num_nhops;
 1489                         if (NH_IS_NHGRP(nh)) {
 1490                                 nhg = (struct nhgrp_object *)nh;
 1491                                 wn = nhgrp_get_nhops(nhg, &num_nhops);
 1492                                 for (int i = 0; i < num_nhops; i++)
 1493                                         rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
 1494                         } else
 1495 #endif
 1496                         rt_routemsg(RTM_DELETE, rt, nh, fibnum);
 1497                 }
 1498                 rt_free(rt);
 1499         }
 1500 
 1501         NET_EPOCH_EXIT(et);
 1502 }
 1503 
 1504 static int
 1505 rt_delete_unconditional(struct radix_node *rn, void *arg)
 1506 {
 1507         struct rtentry *rt = RNTORT(rn);
 1508         struct rib_head *rnh = (struct rib_head *)arg;
 1509 
 1510         rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
 1511         if (RNTORT(rn) == rt)
 1512                 rt_free(rt);
 1513 
 1514         return (0);
 1515 }
 1516 
 1517 /*
 1518  * Removes all routes from the routing table without executing notifications.
 1519  * rtentres will be removed after the end of a current epoch.
 1520  */
 1521 static void
 1522 rib_flush_routes(struct rib_head *rnh)
 1523 {
 1524         RIB_WLOCK(rnh);
 1525         rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
 1526         RIB_WUNLOCK(rnh);
 1527 }
 1528 
 1529 void
 1530 rib_flush_routes_family(int family)
 1531 {
 1532         struct rib_head *rnh;
 1533 
 1534         for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 1535                 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
 1536                         rib_flush_routes(rnh);
 1537         }
 1538 }
 1539 
 1540 const char *
 1541 rib_print_family(int family)
 1542 {
 1543         switch (family) {
 1544         case AF_INET:
 1545                 return ("inet");
 1546         case AF_INET6:
 1547                 return ("inet6");
 1548         case AF_LINK:
 1549                 return ("link");
 1550         }
 1551         return ("unknown");
 1552 }
 1553 

Cache object: 115658c4a708ed69417cde604b0cf1db


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.