1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include "opt_route.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/mbuf.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/syslog.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/rmlock.h>
44
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/if_private.h>
48 #include <net/if_dl.h>
49 #include <net/vnet.h>
50 #include <net/route.h>
51 #include <net/route/route_ctl.h>
52 #include <net/route/route_var.h>
53 #include <net/route/nhop_utils.h>
54 #include <net/route/nhop.h>
55 #include <net/route/nhop_var.h>
56 #include <netinet/in.h>
57 #include <netinet6/scope6_var.h>
58 #include <netinet6/in6_var.h>
59
60 #define DEBUG_MOD_NAME route_ctl
61 #define DEBUG_MAX_LEVEL LOG_DEBUG
62 #include <net/route/route_debug.h>
63 _DECLARE_DEBUG(LOG_INFO);
64
65 /*
66 * This file contains control plane routing tables functions.
67 *
68 * All functions assumes they are called in net epoch.
69 */
70
71 union sockaddr_union {
72 struct sockaddr sa;
73 struct sockaddr_in sin;
74 struct sockaddr_in6 sin6;
75 char _buf[32];
76 };
77
78 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
79 struct rib_cmd_info *rc);
80 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
81 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
82 struct rib_cmd_info *rc);
83
84 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
85 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
86 #ifdef ROUTE_MPATH
87 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
88 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
89 int op_flags, struct rib_cmd_info *rc);
90 #endif
91
92 static int add_route(struct rib_head *rnh, struct rtentry *rt,
93 struct route_nhop_data *rnd, struct rib_cmd_info *rc);
94 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
95 struct rib_cmd_info *rc);
96 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
97 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
98
99 static int get_prio_from_info(const struct rt_addrinfo *info);
100 static int nhop_get_prio(const struct nhop_object *nh);
101
102 #ifdef ROUTE_MPATH
103 static bool rib_can_multipath(struct rib_head *rh);
104 #endif
105
106 /* Per-vnet multipath routing configuration */
107 SYSCTL_DECL(_net_route);
108 #define V_rib_route_multipath VNET(rib_route_multipath)
109 #ifdef ROUTE_MPATH
110 #define _MP_FLAGS CTLFLAG_RW
111 #else
112 #define _MP_FLAGS CTLFLAG_RD
113 #endif
114 VNET_DEFINE(u_int, rib_route_multipath) = 1;
115 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
116 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
117 #undef _MP_FLAGS
118
119 #ifdef ROUTE_MPATH
120 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
121 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
122 &VNET_NAME(fib_hash_outbound), 0,
123 "Compute flowid for locally-originated packets");
124
125 /* Default entropy to add to the hash calculation for the outbound connections*/
126 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
127 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
128 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
129 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
130 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
131 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
132 };
133 #endif
134
135 #if defined(INET) && defined(INET6)
136 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
137 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
138 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
139 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
140 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
141 #endif
142
143 /* Debug bits */
144 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
145
146 static struct rib_head *
147 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
148 {
149 struct rib_head *rnh;
150 struct sockaddr *dst;
151
152 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
153
154 dst = info->rti_info[RTAX_DST];
155 rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
156
157 return (rnh);
158 }
159
160 #if defined(INET) && defined(INET6)
161 bool
162 rib_can_4o6_nhop(void)
163 {
164 return (!!V_rib_route_ipv6_nexthop);
165 }
166 #endif
167
168 #ifdef ROUTE_MPATH
169 static bool
170 rib_can_multipath(struct rib_head *rh)
171 {
172 int result;
173
174 CURVNET_SET(rh->rib_vnet);
175 result = !!V_rib_route_multipath;
176 CURVNET_RESTORE();
177
178 return (result);
179 }
180
181 /*
182 * Check is nhop is multipath-eligible.
183 * Avoid nhops without gateways and redirects.
184 *
185 * Returns 1 for multipath-eligible nexthop,
186 * 0 otherwise.
187 */
188 bool
189 nhop_can_multipath(const struct nhop_object *nh)
190 {
191
192 if ((nh->nh_flags & NHF_MULTIPATH) != 0)
193 return (1);
194 if ((nh->nh_flags & NHF_GATEWAY) == 0)
195 return (0);
196 if ((nh->nh_flags & NHF_REDIRECT) != 0)
197 return (0);
198
199 return (1);
200 }
201 #endif
202
203 static int
204 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
205 {
206 uint32_t weight;
207
208 if (info->rti_mflags & RTV_WEIGHT)
209 weight = info->rti_rmx->rmx_weight;
210 else
211 weight = default_weight;
212 /* Keep upper 1 byte for adm distance purposes */
213 if (weight > RT_MAX_WEIGHT)
214 weight = RT_MAX_WEIGHT;
215 else if (weight == 0)
216 weight = default_weight;
217
218 return (weight);
219 }
220
221 /*
222 * File-local concept for distingushing between the normal and
223 * RTF_PINNED routes tha can override the "normal" one.
224 */
225 #define NH_PRIORITY_HIGH 2
226 #define NH_PRIORITY_NORMAL 1
227 static int
228 get_prio_from_info(const struct rt_addrinfo *info)
229 {
230 if (info->rti_flags & RTF_PINNED)
231 return (NH_PRIORITY_HIGH);
232 return (NH_PRIORITY_NORMAL);
233 }
234
235 static int
236 nhop_get_prio(const struct nhop_object *nh)
237 {
238 if (NH_IS_PINNED(nh))
239 return (NH_PRIORITY_HIGH);
240 return (NH_PRIORITY_NORMAL);
241 }
242
243 /*
244 * Check if specified @gw matches gw data in the nexthop @nh.
245 *
246 * Returns true if matches, false otherwise.
247 */
248 bool
249 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
250 {
251
252 if (nh->gw_sa.sa_family != gw->sa_family)
253 return (false);
254
255 switch (gw->sa_family) {
256 case AF_INET:
257 return (nh->gw4_sa.sin_addr.s_addr ==
258 ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
259 case AF_INET6:
260 {
261 const struct sockaddr_in6 *gw6;
262 gw6 = (const struct sockaddr_in6 *)gw;
263
264 /*
265 * Currently (2020-09) IPv6 gws in kernel have their
266 * scope embedded. Once this becomes false, this code
267 * has to be revisited.
268 */
269 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
270 &gw6->sin6_addr))
271 return (true);
272 return (false);
273 }
274 case AF_LINK:
275 {
276 const struct sockaddr_dl *sdl;
277 sdl = (const struct sockaddr_dl *)gw;
278 return (nh->gwl_sa.sdl_index == sdl->sdl_index);
279 }
280 default:
281 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
282 }
283
284 /* NOTREACHED */
285 return (false);
286 }
287
288 /*
289 * Matches all nexthop with given @gw.
290 * Can be used as rib_filter_f callback.
291 */
292 int
293 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
294 {
295 const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
296
297 return (match_nhop_gw(nh, gw));
298 }
299
300 struct gw_filter_data {
301 const struct sockaddr *gw;
302 int count;
303 };
304
305 /*
306 * Matches first occurence of the gateway provided in @gwd
307 */
308 static int
309 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
310 {
311 struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
312
313 /* Return only first match to make rtsock happy */
314 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
315 return (1);
316 return (0);
317 }
318
319 /*
320 * Checks if data in @info matches nexhop @nh.
321 *
322 * Returns 0 on success,
323 * ESRCH if not matched,
324 * ENOENT if filter function returned false
325 */
326 int
327 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
328 const struct nhop_object *nh)
329 {
330 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
331
332 if (info->rti_filter != NULL) {
333 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
334 return (ENOENT);
335 else
336 return (0);
337 }
338 if ((gw != NULL) && !match_nhop_gw(nh, gw))
339 return (ESRCH);
340
341 return (0);
342 }
343
344 /*
345 * Runs exact prefix match based on @dst and @netmask.
346 * Returns matched @rtentry if found or NULL.
347 * If rtentry was found, saves nexthop / weight value into @rnd.
348 */
349 static struct rtentry *
350 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
351 const struct sockaddr *netmask, struct route_nhop_data *rnd)
352 {
353 struct rtentry *rt;
354
355 RIB_LOCK_ASSERT(rnh);
356
357 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
358 if (rt != NULL) {
359 rnd->rnd_nhop = rt->rt_nhop;
360 rnd->rnd_weight = rt->rt_weight;
361 } else {
362 rnd->rnd_nhop = NULL;
363 rnd->rnd_weight = 0;
364 }
365
366 return (rt);
367 }
368
369 struct rtentry *
370 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
371 struct route_nhop_data *rnd)
372 {
373 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
374 }
375
376 /*
377 * Runs exact prefix match based on dst/netmask from @info.
378 * Assumes RIB lock is held.
379 * Returns matched @rtentry if found or NULL.
380 * If rtentry was found, saves nexthop / weight value into @rnd.
381 */
382 struct rtentry *
383 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
384 struct route_nhop_data *rnd)
385 {
386 struct rtentry *rt;
387
388 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
389 info->rti_info[RTAX_NETMASK], rnd);
390
391 return (rt);
392 }
393
394 static bool
395 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
396 struct sockaddr **pmask)
397 {
398 if (plen == -1) {
399 *pmask = NULL;
400 return (true);
401 }
402
403 switch (family) {
404 #ifdef INET
405 case AF_INET:
406 {
407 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
408 struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
409
410 memset(mask, 0, sizeof(*mask));
411 mask->sin_family = family;
412 mask->sin_len = sizeof(*mask);
413 if (plen == 32)
414 *pmask = NULL;
415 else if (plen > 32 || plen < 0)
416 return (false);
417 else {
418 uint32_t daddr, maddr;
419 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
420 mask->sin_addr.s_addr = maddr;
421 daddr = dst->sin_addr.s_addr;
422 daddr = htonl(ntohl(daddr) & ntohl(maddr));
423 dst->sin_addr.s_addr = daddr;
424 }
425 return (true);
426 }
427 break;
428 #endif
429 #ifdef INET6
430 case AF_INET6:
431 {
432 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
433 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
434
435 memset(mask, 0, sizeof(*mask));
436 mask->sin6_family = family;
437 mask->sin6_len = sizeof(*mask);
438 if (plen == 128)
439 *pmask = NULL;
440 else if (plen > 128 || plen < 0)
441 return (false);
442 else {
443 ip6_writemask(&mask->sin6_addr, plen);
444 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
445 }
446 return (true);
447 }
448 break;
449 #endif
450 }
451 return (false);
452 }
453
454 /*
455 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
456 * to the routing table.
457 *
458 * @fibnum: rtable id to insert route to
459 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
460 * @plen: prefix length (or -1 if host route or not applicable for AF)
461 * @op_flags: combination of RTM_F_ flags
462 * @rc: storage to report operation result
463 *
464 * Returns 0 on success.
465 */
466 int
467 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
468 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
469 {
470 union sockaddr_union mask_storage;
471 struct sockaddr *netmask = &mask_storage.sa;
472 struct rtentry *rt = NULL;
473
474 NET_EPOCH_ASSERT();
475
476 bzero(rc, sizeof(struct rib_cmd_info));
477 rc->rc_cmd = RTM_ADD;
478
479 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
480 if (rnh == NULL)
481 return (EAFNOSUPPORT);
482
483 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
484 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
485 return (EINVAL);
486 }
487
488 if (op_flags & RTM_F_CREATE) {
489 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
490 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
491 return (ENOMEM);
492 }
493 }
494
495 return (add_route_flags(rnh, rt, rnd, op_flags, rc));
496 }
497
498 /*
499 * Attempts to delete @dst/plen prefix matching gateway @gw from the
500 * routing rable.
501 *
502 * @fibnum: rtable id to remove route from
503 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
504 * @plen: prefix length (or -1 if host route or not applicable for AF)
505 * @gw: gateway to match
506 * @op_flags: combination of RTM_F_ flags
507 * @rc: storage to report operation result
508 *
509 * Returns 0 on success.
510 */
511 int
512 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
513 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
514 {
515 struct gw_filter_data gwd = { .gw = gw };
516
517 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
518 }
519
520 /*
521 * Attempts to delete @dst/plen prefix matching @filter_func from the
522 * routing rable.
523 *
524 * @fibnum: rtable id to remove route from
525 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
526 * @plen: prefix length (or -1 if host route or not applicable for AF)
527 * @filter_func: func to be called for each nexthop of the prefix for matching
528 * @filter_arg: argument to pass to @filter_func
529 * @op_flags: combination of RTM_F_ flags
530 * @rc: storage to report operation result
531 *
532 * Returns 0 on success.
533 */
534 int
535 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
536 rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
537 struct rib_cmd_info *rc)
538 {
539 union sockaddr_union mask_storage;
540 struct sockaddr *netmask = &mask_storage.sa;
541 int error;
542
543 NET_EPOCH_ASSERT();
544
545 bzero(rc, sizeof(struct rib_cmd_info));
546 rc->rc_cmd = RTM_DELETE;
547
548 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
549 if (rnh == NULL)
550 return (EAFNOSUPPORT);
551
552 if (dst->sa_len > sizeof(mask_storage)) {
553 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
554 return (EINVAL);
555 }
556
557 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
558 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
559 return (EINVAL);
560 }
561
562 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
563
564 RIB_WLOCK(rnh);
565 struct route_nhop_data rnd;
566 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
567 if (rt != NULL) {
568 error = rt_delete_conditional(rnh, rt, prio, filter_func,
569 filter_arg, rc);
570 } else
571 error = ESRCH;
572 RIB_WUNLOCK(rnh);
573
574 if (error != 0)
575 return (error);
576
577 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
578
579 if (rc->rc_cmd == RTM_DELETE)
580 rt_free(rc->rc_rt);
581 #ifdef ROUTE_MPATH
582 else {
583 /*
584 * Deleting 1 path may result in RTM_CHANGE to
585 * a different mpath group/nhop.
586 * Free old mpath group.
587 */
588 nhop_free_any(rc->rc_nh_old);
589 }
590 #endif
591
592 return (0);
593 }
594
595 /*
596 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
597 * @rt: route to copy.
598 * @rnd_src: nhop and weight. Multipath routes are not supported
599 * @rh_dst: target rtable.
600 * @rc: operation result storage
601 *
602 * Return 0 on success.
603 */
604 int
605 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
606 struct rib_head *rh_dst, struct rib_cmd_info *rc)
607 {
608 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
609 int error;
610
611 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
612
613 IF_DEBUG_LEVEL(LOG_DEBUG2) {
614 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
615 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
616 rt_print_buf(rt, rtbuf, sizeof(rtbuf));
617 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
618 rtbuf, nhbuf, nhop_get_fibnum(nh_src));
619 }
620 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
621 if (nh == NULL) {
622 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
623 return (ENOMEM);
624 }
625 nhop_copy(nh, rnd_src->rnd_nhop);
626 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
627 nhop_set_fibnum(nh, rh_dst->rib_fibnum);
628 nh = nhop_get_nhop_internal(rh_dst, nh, &error);
629 if (error != 0) {
630 FIB_RH_LOG(LOG_INFO, rh_dst,
631 "unable to finalize new nexthop: error %d", error);
632 return (ENOMEM);
633 }
634
635 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
636 if (rt_new == NULL) {
637 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
638 nhop_free(nh);
639 return (ENOMEM);
640 }
641
642 struct route_nhop_data rnd = {
643 .rnd_nhop = nh,
644 .rnd_weight = rnd_src->rnd_weight
645 };
646 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
647 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
648
649 if (error != 0) {
650 IF_DEBUG_LEVEL(LOG_DEBUG2) {
651 char buf[NHOP_PRINT_BUFSIZE];
652 rt_print_buf(rt_new, buf, sizeof(buf));
653 FIB_RH_LOG(LOG_DEBUG, rh_dst,
654 "Unable to add route %s: error %d", buf, error);
655 }
656 nhop_free(nh);
657 rt_free_immediate(rt_new);
658 }
659 return (error);
660 }
661
662 /*
663 * Adds route defined by @info into the kernel table specified by @fibnum and
664 * sa_family in @info->rti_info[RTAX_DST].
665 *
666 * Returns 0 on success and fills in operation metadata into @rc.
667 */
668 int
669 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
670 struct rib_cmd_info *rc)
671 {
672 struct rib_head *rnh;
673 int error;
674
675 NET_EPOCH_ASSERT();
676
677 rnh = get_rnh(fibnum, info);
678 if (rnh == NULL)
679 return (EAFNOSUPPORT);
680
681 /*
682 * Check consistency between RTF_HOST flag and netmask
683 * existence.
684 */
685 if (info->rti_flags & RTF_HOST)
686 info->rti_info[RTAX_NETMASK] = NULL;
687 else if (info->rti_info[RTAX_NETMASK] == NULL) {
688 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
689 return (EINVAL);
690 }
691
692 bzero(rc, sizeof(struct rib_cmd_info));
693 rc->rc_cmd = RTM_ADD;
694
695 error = add_route_byinfo(rnh, info, rc);
696 if (error == 0)
697 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
698
699 return (error);
700 }
701
702 static int
703 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
704 struct rib_cmd_info *rc)
705 {
706 struct route_nhop_data rnd_add;
707 struct nhop_object *nh;
708 struct rtentry *rt;
709 struct sockaddr *dst, *gateway, *netmask;
710 int error;
711
712 dst = info->rti_info[RTAX_DST];
713 gateway = info->rti_info[RTAX_GATEWAY];
714 netmask = info->rti_info[RTAX_NETMASK];
715
716 if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
717 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
718 return (EINVAL);
719 }
720 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
721 FIB_RH_LOG(LOG_DEBUG, rnh,
722 "error: invalid dst/gateway family combination (%d, %d)",
723 dst->sa_family, gateway->sa_family);
724 return (EINVAL);
725 }
726
727 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
728 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
729 dst->sa_len);
730 return (EINVAL);
731 }
732
733 if (info->rti_ifa == NULL) {
734 error = rt_getifa_fib(info, rnh->rib_fibnum);
735 if (error)
736 return (error);
737 }
738
739 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
740 return (ENOBUFS);
741
742 error = nhop_create_from_info(rnh, info, &nh);
743 if (error != 0) {
744 rt_free_immediate(rt);
745 return (error);
746 }
747
748 rnd_add.rnd_nhop = nh;
749 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
750
751 int op_flags = RTM_F_CREATE;
752 if (get_prio_from_info(info) == NH_PRIORITY_HIGH)
753 op_flags |= RTM_F_FORCE;
754 else
755 op_flags |= RTM_F_APPEND;
756 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
757
758 }
759
760 static int
761 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
762 int op_flags, struct rib_cmd_info *rc)
763 {
764 struct route_nhop_data rnd_orig;
765 struct nhop_object *nh;
766 struct rtentry *rt_orig;
767 int error = 0;
768
769 nh = rnd_add->rnd_nhop;
770
771 RIB_WLOCK(rnh);
772
773 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
774
775 if (rt_orig == NULL) {
776 if (op_flags & RTM_F_CREATE)
777 error = add_route(rnh, rt, rnd_add, rc);
778 else
779 error = ESRCH; /* no entry but creation was not required */
780 RIB_WUNLOCK(rnh);
781 if (error != 0)
782 goto out;
783 return (0);
784 }
785
786 if (op_flags & RTM_F_EXCL) {
787 /* We have existing route in the RIB but not allowed to replace. */
788 RIB_WUNLOCK(rnh);
789 error = EEXIST;
790 goto out;
791 }
792
793 /* Now either append or replace */
794 if (op_flags & RTM_F_REPLACE) {
795 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) {
796 /* Old path is "better" (e.g. has PINNED flag set) */
797 RIB_WUNLOCK(rnh);
798 error = EEXIST;
799 goto out;
800 }
801 change_route(rnh, rt_orig, rnd_add, rc);
802 RIB_WUNLOCK(rnh);
803 nh = rc->rc_nh_old;
804 goto out;
805 }
806
807 RIB_WUNLOCK(rnh);
808
809 #ifdef ROUTE_MPATH
810 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
811 nhop_can_multipath(rnd_add->rnd_nhop) &&
812 nhop_can_multipath(rnd_orig.rnd_nhop)) {
813
814 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
815 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
816 op_flags, rc);
817 if (error != EAGAIN)
818 break;
819 RTSTAT_INC(rts_add_retry);
820 }
821
822 /*
823 * Original nhop reference is unused in any case.
824 */
825 nhop_free_any(rnd_add->rnd_nhop);
826 if (op_flags & RTM_F_CREATE) {
827 if (error != 0 || rc->rc_cmd != RTM_ADD)
828 rt_free_immediate(rt);
829 }
830 return (error);
831 }
832 #endif
833 /* Out of options - free state and return error */
834 error = EEXIST;
835 out:
836 if (op_flags & RTM_F_CREATE)
837 rt_free_immediate(rt);
838 nhop_free_any(nh);
839
840 return (error);
841 }
842
843 #ifdef ROUTE_MPATH
844 static int
845 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
846 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
847 int op_flags, struct rib_cmd_info *rc)
848 {
849 RIB_RLOCK_TRACKER;
850 struct route_nhop_data rnd_new;
851 int error = 0;
852
853 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
854 if (error != 0) {
855 if (error == EAGAIN) {
856 /*
857 * Group creation failed, most probably because
858 * @rnd_orig data got scheduled for deletion.
859 * Refresh @rnd_orig data and retry.
860 */
861 RIB_RLOCK(rnh);
862 lookup_prefix_rt(rnh, rt, rnd_orig);
863 RIB_RUNLOCK(rnh);
864 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
865 /* In this iteration route doesn't exist */
866 error = ENOENT;
867 }
868 }
869 return (error);
870 }
871 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
872 if (error != 0)
873 return (error);
874
875 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
876 /*
877 * First multipath route got installed. Enable local
878 * outbound connections hashing.
879 */
880 if (bootverbose)
881 printf("FIB: enabled flowid calculation for locally-originated packets\n");
882 V_fib_hash_outbound = 1;
883 }
884
885 return (0);
886 }
887 #endif
888
889 /*
890 * Removes route defined by @info from the kernel table specified by @fibnum and
891 * sa_family in @info->rti_info[RTAX_DST].
892 *
893 * Returns 0 on success and fills in operation metadata into @rc.
894 */
895 int
896 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
897 {
898 struct rib_head *rnh;
899 struct sockaddr *dst, *netmask;
900 struct sockaddr_storage mdst;
901 int error;
902
903 NET_EPOCH_ASSERT();
904
905 rnh = get_rnh(fibnum, info);
906 if (rnh == NULL)
907 return (EAFNOSUPPORT);
908
909 bzero(rc, sizeof(struct rib_cmd_info));
910 rc->rc_cmd = RTM_DELETE;
911
912 dst = info->rti_info[RTAX_DST];
913 netmask = info->rti_info[RTAX_NETMASK];
914
915 if (netmask != NULL) {
916 /* Ensure @dst is always properly masked */
917 if (dst->sa_len > sizeof(mdst)) {
918 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
919 return (EINVAL);
920 }
921 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
922 dst = (struct sockaddr *)&mdst;
923 }
924
925 rib_filter_f_t *filter_func = NULL;
926 void *filter_arg = NULL;
927 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
928
929 if (info->rti_filter != NULL) {
930 filter_func = info->rti_filter;
931 filter_arg = info->rti_filterdata;
932 } else if (gwd.gw != NULL) {
933 filter_func = match_gw_one;
934 filter_arg = &gwd;
935 }
936
937 int prio = get_prio_from_info(info);
938
939 RIB_WLOCK(rnh);
940 struct route_nhop_data rnd;
941 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
942 if (rt != NULL) {
943 error = rt_delete_conditional(rnh, rt, prio, filter_func,
944 filter_arg, rc);
945 } else
946 error = ESRCH;
947 RIB_WUNLOCK(rnh);
948
949 if (error != 0)
950 return (error);
951
952 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
953
954 if (rc->rc_cmd == RTM_DELETE)
955 rt_free(rc->rc_rt);
956 #ifdef ROUTE_MPATH
957 else {
958 /*
959 * Deleting 1 path may result in RTM_CHANGE to
960 * a different mpath group/nhop.
961 * Free old mpath group.
962 */
963 nhop_free_any(rc->rc_nh_old);
964 }
965 #endif
966
967 return (0);
968 }
969
970 /*
971 * Conditionally unlinks rtentry paths from @rnh matching @cb.
972 * Returns 0 on success with operation result stored in @rc.
973 * On error, returns:
974 * ESRCH - if prefix was not found or filter function failed to match
975 * EADDRINUSE - if trying to delete higher priority route.
976 */
977 static int
978 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
979 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
980 {
981 struct nhop_object *nh = rt->rt_nhop;
982
983 #ifdef ROUTE_MPATH
984 if (NH_IS_NHGRP(nh)) {
985 struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
986 struct route_nhop_data rnd;
987 int error;
988
989 if (cb == NULL)
990 return (ESRCH);
991 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
992 if (error == 0) {
993 if (rnd.rnd_nhgrp == nhg) {
994 /* No match, unreference new group and return. */
995 nhop_free_any(rnd.rnd_nhop);
996 return (ESRCH);
997 }
998 error = change_route(rnh, rt, &rnd, rc);
999 }
1000 return (error);
1001 }
1002 #endif
1003 if (cb != NULL && !cb(rt, nh, cbdata))
1004 return (ESRCH);
1005
1006 if (prio < nhop_get_prio(nh))
1007 return (EADDRINUSE);
1008
1009 return (delete_route(rnh, rt, rc));
1010 }
1011
1012 int
1013 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1014 struct rib_cmd_info *rc)
1015 {
1016 RIB_RLOCK_TRACKER;
1017 struct route_nhop_data rnd_orig;
1018 struct rib_head *rnh;
1019 struct rtentry *rt;
1020 int error;
1021
1022 NET_EPOCH_ASSERT();
1023
1024 rnh = get_rnh(fibnum, info);
1025 if (rnh == NULL)
1026 return (EAFNOSUPPORT);
1027
1028 bzero(rc, sizeof(struct rib_cmd_info));
1029 rc->rc_cmd = RTM_CHANGE;
1030
1031 /* Check if updated gateway exists */
1032 if ((info->rti_flags & RTF_GATEWAY) &&
1033 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1034
1035 /*
1036 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1037 * Remove RTF_GATEWAY to enforce consistency and maintain
1038 * compatibility..
1039 */
1040 info->rti_flags &= ~RTF_GATEWAY;
1041 }
1042
1043 /*
1044 * route change is done in multiple steps, with dropping and
1045 * reacquiring lock. In the situations with multiple processes
1046 * changes the same route in can lead to the case when route
1047 * is changed between the steps. Address it by retrying the operation
1048 * multiple times before failing.
1049 */
1050
1051 RIB_RLOCK(rnh);
1052 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1053 info->rti_info[RTAX_NETMASK], &rnh->head);
1054
1055 if (rt == NULL) {
1056 RIB_RUNLOCK(rnh);
1057 return (ESRCH);
1058 }
1059
1060 rnd_orig.rnd_nhop = rt->rt_nhop;
1061 rnd_orig.rnd_weight = rt->rt_weight;
1062
1063 RIB_RUNLOCK(rnh);
1064
1065 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1066 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1067 if (error != EAGAIN)
1068 break;
1069 }
1070
1071 return (error);
1072 }
1073
1074 static int
1075 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1076 struct nhop_object *nh_orig, struct nhop_object **nh_new)
1077 {
1078 int error;
1079
1080 /*
1081 * New gateway could require new ifaddr, ifp;
1082 * flags may also be different; ifp may be specified
1083 * by ll sockaddr when protocol address is ambiguous
1084 */
1085 if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1086 info->rti_info[RTAX_GATEWAY] != NULL) ||
1087 info->rti_info[RTAX_IFP] != NULL ||
1088 (info->rti_info[RTAX_IFA] != NULL &&
1089 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1090 error = rt_getifa_fib(info, rnh->rib_fibnum);
1091
1092 if (error != 0) {
1093 info->rti_ifa = NULL;
1094 return (error);
1095 }
1096 }
1097
1098 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1099 info->rti_ifa = NULL;
1100
1101 return (error);
1102 }
1103
1104 #ifdef ROUTE_MPATH
1105 static int
1106 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1107 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1108 struct rib_cmd_info *rc)
1109 {
1110 int error = 0, found_idx = 0;
1111 struct nhop_object *nh_orig = NULL, *nh_new;
1112 struct route_nhop_data rnd_new = {};
1113 const struct weightened_nhop *wn = NULL;
1114 struct weightened_nhop *wn_new;
1115 uint32_t num_nhops;
1116
1117 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1118 for (int i = 0; i < num_nhops; i++) {
1119 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1120 nh_orig = wn[i].nh;
1121 found_idx = i;
1122 break;
1123 }
1124 }
1125
1126 if (nh_orig == NULL)
1127 return (ESRCH);
1128
1129 error = change_nhop(rnh, info, nh_orig, &nh_new);
1130 if (error != 0)
1131 return (error);
1132
1133 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1134 M_TEMP, M_NOWAIT | M_ZERO);
1135 if (wn_new == NULL) {
1136 nhop_free(nh_new);
1137 return (EAGAIN);
1138 }
1139
1140 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1141 wn_new[found_idx].nh = nh_new;
1142 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1143
1144 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1145 nhop_free(nh_new);
1146 free(wn_new, M_TEMP);
1147
1148 if (error != 0)
1149 return (error);
1150
1151 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1152
1153 return (error);
1154 }
1155 #endif
1156
1157 static int
1158 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1159 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1160 struct rib_cmd_info *rc)
1161 {
1162 int error = 0;
1163 struct nhop_object *nh_orig;
1164 struct route_nhop_data rnd_new;
1165
1166 nh_orig = rnd_orig->rnd_nhop;
1167 if (nh_orig == NULL)
1168 return (ESRCH);
1169
1170 #ifdef ROUTE_MPATH
1171 if (NH_IS_NHGRP(nh_orig))
1172 return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1173 #endif
1174
1175 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1176 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1177 if (error != 0)
1178 return (error);
1179 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1180
1181 return (error);
1182 }
1183
1184 /*
1185 * Insert @rt with nhop data from @rnd_new to @rnh.
1186 * Returns 0 on success and stores operation results in @rc.
1187 */
1188 static int
1189 add_route(struct rib_head *rnh, struct rtentry *rt,
1190 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1191 {
1192 struct radix_node *rn;
1193
1194 RIB_WLOCK_ASSERT(rnh);
1195
1196 rt->rt_nhop = rnd->rnd_nhop;
1197 rt->rt_weight = rnd->rnd_weight;
1198 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1199
1200 if (rn != NULL) {
1201 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1202 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1203
1204 /* Finalize notification */
1205 rib_bump_gen(rnh);
1206 rnh->rnh_prefixes++;
1207
1208 rc->rc_cmd = RTM_ADD;
1209 rc->rc_rt = rt;
1210 rc->rc_nh_old = NULL;
1211 rc->rc_nh_new = rnd->rnd_nhop;
1212 rc->rc_nh_weight = rnd->rnd_weight;
1213
1214 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1215 return (0);
1216 }
1217
1218 /* Existing route or memory allocation failure. */
1219 return (EEXIST);
1220 }
1221
1222 /*
1223 * Unconditionally deletes @rt from @rnh.
1224 */
1225 static int
1226 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1227 {
1228 RIB_WLOCK_ASSERT(rnh);
1229
1230 /* Route deletion requested. */
1231 struct radix_node *rn;
1232
1233 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1234 if (rn == NULL)
1235 return (ESRCH);
1236 rt = RNTORT(rn);
1237 rt->rte_flags &= ~RTF_UP;
1238
1239 rib_bump_gen(rnh);
1240 rnh->rnh_prefixes--;
1241
1242 rc->rc_cmd = RTM_DELETE;
1243 rc->rc_rt = rt;
1244 rc->rc_nh_old = rt->rt_nhop;
1245 rc->rc_nh_new = NULL;
1246 rc->rc_nh_weight = rt->rt_weight;
1247
1248 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1249
1250 return (0);
1251 }
1252
1253 /*
1254 * Switch @rt nhop/weigh to the ones specified in @rnd.
1255 * Returns 0 on success.
1256 */
1257 int
1258 change_route(struct rib_head *rnh, struct rtentry *rt,
1259 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1260 {
1261 struct nhop_object *nh_orig;
1262
1263 RIB_WLOCK_ASSERT(rnh);
1264
1265 nh_orig = rt->rt_nhop;
1266
1267 if (rnd->rnd_nhop == NULL)
1268 return (delete_route(rnh, rt, rc));
1269
1270 /* Changing nexthop & weight to a new one */
1271 rt->rt_nhop = rnd->rnd_nhop;
1272 rt->rt_weight = rnd->rnd_weight;
1273 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1274 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1275
1276 /* Finalize notification */
1277 rib_bump_gen(rnh);
1278 rc->rc_cmd = RTM_CHANGE;
1279 rc->rc_rt = rt;
1280 rc->rc_nh_old = nh_orig;
1281 rc->rc_nh_new = rnd->rnd_nhop;
1282 rc->rc_nh_weight = rnd->rnd_weight;
1283
1284 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1285
1286 return (0);
1287 }
1288
1289 /*
1290 * Conditionally update route nhop/weight IFF data in @nhd_orig is
1291 * consistent with the current route data.
1292 * Nexthop in @nhd_new is consumed.
1293 */
1294 int
1295 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1296 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1297 struct rib_cmd_info *rc)
1298 {
1299 struct rtentry *rt_new;
1300 int error = 0;
1301
1302 IF_DEBUG_LEVEL(LOG_DEBUG2) {
1303 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1304 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1305 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1306 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1307 "trying change %s -> %s", buf_old, buf_new);
1308 }
1309 RIB_WLOCK(rnh);
1310
1311 struct route_nhop_data rnd;
1312 rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1313
1314 if (rt_new == NULL) {
1315 if (rnd_orig->rnd_nhop == NULL)
1316 error = add_route(rnh, rt, rnd_new, rc);
1317 else {
1318 /*
1319 * Prefix does not exist, which was not our assumption.
1320 * Update @rnd_orig with the new data and return
1321 */
1322 rnd_orig->rnd_nhop = NULL;
1323 rnd_orig->rnd_weight = 0;
1324 error = EAGAIN;
1325 }
1326 } else {
1327 /* Prefix exists, try to update */
1328 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1329 /*
1330 * Nhop/mpath group hasn't changed. Flip
1331 * to the new precalculated one and return
1332 */
1333 error = change_route(rnh, rt_new, rnd_new, rc);
1334 } else {
1335 /* Update and retry */
1336 rnd_orig->rnd_nhop = rt_new->rt_nhop;
1337 rnd_orig->rnd_weight = rt_new->rt_weight;
1338 error = EAGAIN;
1339 }
1340 }
1341
1342 RIB_WUNLOCK(rnh);
1343
1344 if (error == 0) {
1345 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1346
1347 if (rnd_orig->rnd_nhop != NULL)
1348 nhop_free_any(rnd_orig->rnd_nhop);
1349
1350 } else {
1351 if (rnd_new->rnd_nhop != NULL)
1352 nhop_free_any(rnd_new->rnd_nhop);
1353 }
1354
1355 return (error);
1356 }
1357
1358 /*
1359 * Performs modification of routing table specificed by @action.
1360 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1361 * Needs to be run in network epoch.
1362 *
1363 * Returns 0 on success and fills in @rc with action result.
1364 */
1365 int
1366 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1367 struct rib_cmd_info *rc)
1368 {
1369 int error;
1370
1371 switch (action) {
1372 case RTM_ADD:
1373 error = rib_add_route(fibnum, info, rc);
1374 break;
1375 case RTM_DELETE:
1376 error = rib_del_route(fibnum, info, rc);
1377 break;
1378 case RTM_CHANGE:
1379 error = rib_change_route(fibnum, info, rc);
1380 break;
1381 default:
1382 error = ENOTSUP;
1383 }
1384
1385 return (error);
1386 }
1387
1388 struct rt_delinfo
1389 {
1390 struct rib_head *rnh;
1391 struct rtentry *head;
1392 rib_filter_f_t *filter_f;
1393 void *filter_arg;
1394 int prio;
1395 struct rib_cmd_info rc;
1396 };
1397
1398 /*
1399 * Conditionally unlinks rtenties or paths from radix tree based
1400 * on the callback data passed in @arg.
1401 */
1402 static int
1403 rt_checkdelroute(struct radix_node *rn, void *arg)
1404 {
1405 struct rt_delinfo *di = (struct rt_delinfo *)arg;
1406 struct rtentry *rt = (struct rtentry *)rn;
1407
1408 if (rt_delete_conditional(di->rnh, rt, di->prio,
1409 di->filter_f, di->filter_arg, &di->rc) != 0)
1410 return (0);
1411
1412 /*
1413 * Add deleted rtentries to the list to GC them
1414 * after dropping the lock.
1415 *
1416 * XXX: Delayed notifications not implemented
1417 * for nexthop updates.
1418 */
1419 if (di->rc.rc_cmd == RTM_DELETE) {
1420 /* Add to the list and return */
1421 rt->rt_chain = di->head;
1422 di->head = rt;
1423 #ifdef ROUTE_MPATH
1424 } else {
1425 /*
1426 * RTM_CHANGE to a different nexthop or nexthop group.
1427 * Free old multipath group.
1428 */
1429 nhop_free_any(di->rc.rc_nh_old);
1430 #endif
1431 }
1432
1433 return (0);
1434 }
1435
1436 /*
1437 * Iterates over a routing table specified by @fibnum and @family and
1438 * deletes elements marked by @filter_f.
1439 * @fibnum: rtable id
1440 * @family: AF_ address family
1441 * @filter_f: function returning non-zero value for items to delete
1442 * @arg: data to pass to the @filter_f function
1443 * @report: true if rtsock notification is needed.
1444 */
1445 void
1446 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1447 bool report)
1448 {
1449 struct rib_head *rnh;
1450 struct rtentry *rt;
1451 struct nhop_object *nh;
1452 struct epoch_tracker et;
1453
1454 rnh = rt_tables_get_rnh(fibnum, family);
1455 if (rnh == NULL)
1456 return;
1457
1458 struct rt_delinfo di = {
1459 .rnh = rnh,
1460 .filter_f = filter_f,
1461 .filter_arg = filter_arg,
1462 .prio = NH_PRIORITY_NORMAL,
1463 };
1464
1465 NET_EPOCH_ENTER(et);
1466
1467 RIB_WLOCK(rnh);
1468 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1469 RIB_WUNLOCK(rnh);
1470
1471 /* We might have something to reclaim. */
1472 bzero(&di.rc, sizeof(di.rc));
1473 di.rc.rc_cmd = RTM_DELETE;
1474 while (di.head != NULL) {
1475 rt = di.head;
1476 di.head = rt->rt_chain;
1477 rt->rt_chain = NULL;
1478 nh = rt->rt_nhop;
1479
1480 di.rc.rc_rt = rt;
1481 di.rc.rc_nh_old = nh;
1482 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1483
1484 if (report) {
1485 #ifdef ROUTE_MPATH
1486 struct nhgrp_object *nhg;
1487 const struct weightened_nhop *wn;
1488 uint32_t num_nhops;
1489 if (NH_IS_NHGRP(nh)) {
1490 nhg = (struct nhgrp_object *)nh;
1491 wn = nhgrp_get_nhops(nhg, &num_nhops);
1492 for (int i = 0; i < num_nhops; i++)
1493 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1494 } else
1495 #endif
1496 rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1497 }
1498 rt_free(rt);
1499 }
1500
1501 NET_EPOCH_EXIT(et);
1502 }
1503
1504 static int
1505 rt_delete_unconditional(struct radix_node *rn, void *arg)
1506 {
1507 struct rtentry *rt = RNTORT(rn);
1508 struct rib_head *rnh = (struct rib_head *)arg;
1509
1510 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1511 if (RNTORT(rn) == rt)
1512 rt_free(rt);
1513
1514 return (0);
1515 }
1516
1517 /*
1518 * Removes all routes from the routing table without executing notifications.
1519 * rtentres will be removed after the end of a current epoch.
1520 */
1521 static void
1522 rib_flush_routes(struct rib_head *rnh)
1523 {
1524 RIB_WLOCK(rnh);
1525 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1526 RIB_WUNLOCK(rnh);
1527 }
1528
1529 void
1530 rib_flush_routes_family(int family)
1531 {
1532 struct rib_head *rnh;
1533
1534 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1535 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1536 rib_flush_routes(rnh);
1537 }
1538 }
1539
1540 const char *
1541 rib_print_family(int family)
1542 {
1543 switch (family) {
1544 case AF_INET:
1545 return ("inet");
1546 case AF_INET6:
1547 return ("inet6");
1548 case AF_LINK:
1549 return ("link");
1550 }
1551 return ("unknown");
1552 }
1553
Cache object: 115658c4a708ed69417cde604b0cf1db
|