1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include "opt_route.h"
33 #include <sys/types.h>
34 #include <sys/ck.h>
35 #include <sys/epoch.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/rmlock.h>
39 #include <sys/socket.h>
40
41 #include <net/if.h>
42 #include <net/route.h>
43 #include <net/route/nhop.h>
44 #include <net/route/nhop_utils.h>
45
46 #include <net/route/route_ctl.h>
47 #include <net/route/route_var.h>
48 #include <netinet6/scope6_var.h>
49 #include <netlink/netlink.h>
50 #include <netlink/netlink_ctl.h>
51 #include <netlink/netlink_route.h>
52 #include <netlink/route/route_var.h>
53
54 #define DEBUG_MOD_NAME nl_nhop
55 #define DEBUG_MAX_LEVEL LOG_DEBUG3
56 #include <netlink/netlink_debug.h>
57 _DECLARE_DEBUG(LOG_DEBUG3);
58
59 /*
60 * This file contains the logic to maintain kernel nexthops and
61 * nexhop groups based om the data provided by the user.
62 *
63 * Kernel stores (nearly) all of the routing data in the nexthops,
64 * including the prefix-specific flags (NHF_HOST and NHF_DEFAULT).
65 *
66 * Netlink API provides higher-level abstraction for the user. Each
67 * user-created nexthop may map to multiple kernel nexthops.
68 *
69 * The following variations require separate kernel nexthop to be
70 * created:
71 * * prefix flags (NHF_HOST, NHF_DEFAULT)
72 * * using IPv6 gateway for IPv4 routes
73 * * different fibnum
74 *
75 * These kernel nexthops have the lifetime bound to the lifetime of
76 * the user_nhop object. They are not collected until user requests
77 * to delete the created user_nhop.
78 *
79 */
80 struct user_nhop {
81 uint32_t un_idx; /* Userland-provided index */
82 uint32_t un_fibfam; /* fibnum+af(as highest byte) */
83 uint8_t un_protocol; /* protocol that install the record */
84 struct nhop_object *un_nhop; /* "production" nexthop */
85 struct nhop_object *un_nhop_src; /* nexthop to copy from */
86 struct weightened_nhop *un_nhgrp_src; /* nexthops for nhg */
87 uint32_t un_nhgrp_count; /* number of nexthops */
88 struct user_nhop *un_next; /* next item in hash chain */
89 struct user_nhop *un_nextchild; /* master -> children */
90 struct epoch_context un_epoch_ctx; /* epoch ctl helper */
91 };
92
93 /* produce hash value for an object */
94 #define unhop_hash_obj(_obj) (hash_unhop(_obj))
95 /* compare two objects */
96 #define unhop_cmp(_one, _two) (cmp_unhop(_one, _two))
97 /* next object accessor */
98 #define unhop_next(_obj) (_obj)->un_next
99
100 CHT_SLIST_DEFINE(unhop, struct user_nhop);
101
102 struct unhop_ctl {
103 struct unhop_head un_head;
104 struct rmlock un_lock;
105 };
106 #define UN_LOCK_INIT(_ctl) rm_init(&(_ctl)->un_lock, "unhop_ctl")
107 #define UN_TRACKER struct rm_priotracker un_tracker
108 #define UN_RLOCK(_ctl) rm_rlock(&((_ctl)->un_lock), &un_tracker)
109 #define UN_RUNLOCK(_ctl) rm_runlock(&((_ctl)->un_lock), &un_tracker)
110
111 #define UN_WLOCK(_ctl) rm_wlock(&(_ctl)->un_lock);
112 #define UN_WUNLOCK(_ctl) rm_wunlock(&(_ctl)->un_lock);
113
114 VNET_DEFINE_STATIC(struct unhop_ctl *, un_ctl) = NULL;
115 #define V_un_ctl VNET(un_ctl)
116
117 static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size);
118 static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b);
119 static unsigned int hash_unhop(const struct user_nhop *obj);
120
121 static void destroy_unhop(struct user_nhop *unhop);
122 static struct nhop_object *clone_unhop(const struct user_nhop *unhop,
123 uint32_t fibnum, int family, int nh_flags);
124
125 static int
126 cmp_unhop(const struct user_nhop *a, const struct user_nhop *b)
127 {
128 return (a->un_idx == b->un_idx && a->un_fibfam == b->un_fibfam);
129 }
130
131 /*
132 * Hash callback: calculate hash of an object
133 */
134 static unsigned int
135 hash_unhop(const struct user_nhop *obj)
136 {
137 return (obj->un_idx ^ obj->un_fibfam);
138 }
139
140 #define UNHOP_IS_MASTER(_unhop) ((_unhop)->un_fibfam == 0)
141
142 /*
143 * Factory interface for creating matching kernel nexthops/nexthop groups
144 *
145 * @uidx: userland nexhop index used to create the nexthop
146 * @fibnum: fibnum nexthop will be used in
147 * @family: upper family nexthop will be used in
148 * @nh_flags: desired nexthop prefix flags
149 * @perror: pointer to store error to
150 *
151 * Returns referenced nexthop linked to @fibnum/@family rib on success.
152 */
153 struct nhop_object *
154 nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx,
155 int nh_flags, int *perror)
156 {
157 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
158 UN_TRACKER;
159
160 if (__predict_false(ctl == NULL))
161 return (NULL);
162
163 struct user_nhop key= {
164 .un_idx = uidx,
165 .un_fibfam = fibnum | ((uint32_t)family) << 24,
166 };
167 struct user_nhop *unhop;
168
169 nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT);
170
171 if (__predict_false(family == 0))
172 return (NULL);
173
174 UN_RLOCK(ctl);
175 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
176 if (unhop != NULL) {
177 struct nhop_object *nh = unhop->un_nhop;
178 UN_RLOCK(ctl);
179 *perror = 0;
180 nhop_ref_any(nh);
181 return (nh);
182 }
183
184 /*
185 * Exact nexthop not found. Search for template nexthop to clone from.
186 */
187 key.un_fibfam = 0;
188 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
189 if (unhop == NULL) {
190 UN_RUNLOCK(ctl);
191 *perror = ESRCH;
192 return (NULL);
193 }
194
195 UN_RUNLOCK(ctl);
196
197 /* Create entry to insert first */
198 struct user_nhop *un_new, *un_tmp;
199 un_new = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO);
200 if (un_new == NULL) {
201 *perror = ENOMEM;
202 return (NULL);
203 }
204 un_new->un_idx = uidx;
205 un_new->un_fibfam = fibnum | ((uint32_t)family) << 24;
206
207 /* Relying on epoch to protect unhop here */
208 un_new->un_nhop = clone_unhop(unhop, fibnum, family, nh_flags);
209 if (un_new->un_nhop == NULL) {
210 free(un_new, M_NETLINK);
211 *perror = ENOMEM;
212 return (NULL);
213 }
214
215 /* Insert back and report */
216 UN_WLOCK(ctl);
217
218 /* First, find template record once again */
219 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
220 if (unhop == NULL) {
221 /* Someone deleted the nexthop during the call */
222 UN_WUNLOCK(ctl);
223 *perror = ESRCH;
224 destroy_unhop(un_new);
225 return (NULL);
226 }
227
228 /* Second, check the direct match */
229 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, un_new, un_tmp);
230 struct nhop_object *nh;
231 if (un_tmp != NULL) {
232 /* Another thread already created the desired nextop, use it */
233 nh = un_tmp->un_nhop;
234 } else {
235 /* Finally, insert the new nexthop and link it to the primary */
236 nh = un_new->un_nhop;
237 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, un_new);
238 un_new->un_nextchild = unhop->un_nextchild;
239 unhop->un_nextchild = un_new;
240 un_new = NULL;
241 NL_LOG(LOG_DEBUG2, "linked cloned nexthop %p", nh);
242 }
243
244 UN_WUNLOCK(ctl);
245
246 if (un_new != NULL)
247 destroy_unhop(un_new);
248
249 *perror = 0;
250 nhop_ref_any(nh);
251 return (nh);
252 }
253
254 static struct user_nhop *
255 nl_find_base_unhop(struct unhop_ctl *ctl, uint32_t uidx)
256 {
257 struct user_nhop key= { .un_idx = uidx };
258 struct user_nhop *unhop = NULL;
259 UN_TRACKER;
260
261 UN_RLOCK(ctl);
262 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
263 UN_RUNLOCK(ctl);
264
265 return (unhop);
266 }
267
268 #define MAX_STACK_NHOPS 4
269 static struct nhop_object *
270 clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags)
271 {
272 #ifdef ROUTE_MPATH
273 const struct weightened_nhop *wn;
274 struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS];
275 uint32_t num_nhops;
276 #endif
277 struct nhop_object *nh = NULL;
278 int error;
279
280 if (unhop->un_nhop_src != NULL) {
281 IF_DEBUG_LEVEL(LOG_DEBUG2) {
282 char nhbuf[NHOP_PRINT_BUFSIZE];
283 nhop_print_buf_any(unhop->un_nhop_src, nhbuf, sizeof(nhbuf));
284 FIB_NH_LOG(LOG_DEBUG2, unhop->un_nhop_src,
285 "cloning nhop %s -> %u.%u flags 0x%X", nhbuf, fibnum,
286 family, nh_flags);
287 }
288 struct nhop_object *nh;
289 nh = nhop_alloc(fibnum, AF_UNSPEC);
290 if (nh == NULL)
291 return (NULL);
292 nhop_copy(nh, unhop->un_nhop_src);
293 /* Check that nexthop gateway is compatible with the new family */
294 if (!nhop_set_upper_family(nh, family)) {
295 nhop_free(nh);
296 return (NULL);
297 }
298 nhop_set_uidx(nh, unhop->un_idx);
299 nhop_set_pxtype_flag(nh, nh_flags);
300 return (nhop_get_nhop(nh, &error));
301 }
302 #ifdef ROUTE_MPATH
303 wn = unhop->un_nhgrp_src;
304 num_nhops = unhop->un_nhgrp_count;
305
306 if (num_nhops > MAX_STACK_NHOPS) {
307 wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT);
308 if (wn_new == NULL)
309 return (NULL);
310 } else
311 wn_new = wn_base;
312
313 for (int i = 0; i < num_nhops; i++) {
314 uint32_t uidx = nhop_get_uidx(wn[i].nh);
315 MPASS(uidx != 0);
316 wn_new[i].nh = nl_find_nhop(fibnum, family, uidx, nh_flags, &error);
317 if (error != 0)
318 break;
319 wn_new[i].weight = wn[i].weight;
320 }
321
322 if (error == 0) {
323 struct rib_head *rh = nhop_get_rh(wn_new[0].nh);
324 struct nhgrp_object *nhg;
325
326 error = nhgrp_get_group(rh, wn_new, num_nhops, unhop->un_idx, &nhg);
327 nh = (struct nhop_object *)nhg;
328 }
329
330 if (wn_new != wn_base)
331 free(wn_new, M_TEMP);
332 #endif
333 return (nh);
334 }
335
336 static void
337 destroy_unhop(struct user_nhop *unhop)
338 {
339 if (unhop->un_nhop != NULL)
340 nhop_free_any(unhop->un_nhop);
341 if (unhop->un_nhop_src != NULL)
342 nhop_free_any(unhop->un_nhop_src);
343 free(unhop, M_NETLINK);
344 }
345
346 static void
347 destroy_unhop_epoch(epoch_context_t ctx)
348 {
349 struct user_nhop *unhop;
350
351 unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx);
352
353 destroy_unhop(unhop);
354 }
355
356 static uint32_t
357 find_spare_uidx(struct unhop_ctl *ctl)
358 {
359 struct user_nhop *unhop, key = {};
360 uint32_t uidx = 0;
361 UN_TRACKER;
362
363 UN_RLOCK(ctl);
364 /* This should return spare uid with 75% of 65k used in ~99/100 cases */
365 for (int i = 0; i < 16; i++) {
366 key.un_idx = (arc4random() % 65536) + 65536 * 4;
367 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
368 if (unhop == NULL) {
369 uidx = key.un_idx;
370 break;
371 }
372 }
373 UN_RUNLOCK(ctl);
374
375 return (uidx);
376 }
377
378
379 /*
380 * Actual netlink code
381 */
382 struct netlink_walkargs {
383 struct nl_writer *nw;
384 struct nlmsghdr hdr;
385 struct nlpcb *so;
386 int family;
387 int error;
388 int count;
389 int dumped;
390 };
391 #define ENOMEM_IF_NULL(_v) if ((_v) == NULL) goto enomem
392
393 static bool
394 dump_nhgrp(const struct user_nhop *unhop, struct nlmsghdr *hdr,
395 struct nl_writer *nw)
396 {
397
398 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg)))
399 goto enomem;
400
401 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg);
402 nhm->nh_family = AF_UNSPEC;
403 nhm->nh_scope = 0;
404 nhm->nh_protocol = unhop->un_protocol;
405 nhm->nh_flags = 0;
406
407 nlattr_add_u32(nw, NHA_ID, unhop->un_idx);
408 nlattr_add_u16(nw, NHA_GROUP_TYPE, NEXTHOP_GRP_TYPE_MPATH);
409
410 struct weightened_nhop *wn = unhop->un_nhgrp_src;
411 uint32_t num_nhops = unhop->un_nhgrp_count;
412 /* TODO: a better API? */
413 int nla_len = sizeof(struct nlattr);
414 nla_len += NETLINK_ALIGN(num_nhops * sizeof(struct nexthop_grp));
415 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
416 if (nla == NULL)
417 goto enomem;
418 nla->nla_type = NHA_GROUP;
419 nla->nla_len = nla_len;
420 for (int i = 0; i < num_nhops; i++) {
421 struct nexthop_grp *grp = &((struct nexthop_grp *)(nla + 1))[i];
422 grp->id = nhop_get_uidx(wn[i].nh);
423 grp->weight = wn[i].weight;
424 grp->resvd1 = 0;
425 grp->resvd2 = 0;
426 }
427
428 if (nlmsg_end(nw))
429 return (true);
430 enomem:
431 NL_LOG(LOG_DEBUG, "error: unable to allocate attribute memory");
432 nlmsg_abort(nw);
433 return (false);
434 }
435
436 static bool
437 dump_nhop(const struct user_nhop *unhop, struct nlmsghdr *hdr,
438 struct nl_writer *nw)
439 {
440 struct nhop_object *nh = unhop->un_nhop_src;
441
442 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg)))
443 goto enomem;
444
445 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg);
446 ENOMEM_IF_NULL(nhm);
447 nhm->nh_family = nhop_get_neigh_family(nh);
448 nhm->nh_scope = 0; // XXX: what's that?
449 nhm->nh_protocol = unhop->un_protocol;
450 nhm->nh_flags = 0;
451
452 nlattr_add_u32(nw, NHA_ID, unhop->un_idx);
453 if (nh->nh_flags & NHF_BLACKHOLE) {
454 nlattr_add_flag(nw, NHA_BLACKHOLE);
455 goto done;
456 }
457 nlattr_add_u32(nw, NHA_OIF, nh->nh_ifp->if_index);
458
459 switch (nh->gw_sa.sa_family) {
460 #ifdef INET
461 case AF_INET:
462 nlattr_add(nw, NHA_GATEWAY, 4, &nh->gw4_sa.sin_addr);
463 break;
464 #endif
465 #ifdef INET6
466 case AF_INET6:
467 {
468 struct in6_addr addr = nh->gw6_sa.sin6_addr;
469 in6_clearscope(&addr);
470 nlattr_add(nw, NHA_GATEWAY, 16, &addr);
471 break;
472 }
473 #endif
474 }
475
476 done:
477 if (nlmsg_end(nw))
478 return (true);
479 enomem:
480 nlmsg_abort(nw);
481 return (false);
482 }
483
484 static void
485 dump_unhop(const struct user_nhop *unhop, struct nlmsghdr *hdr,
486 struct nl_writer *nw)
487 {
488 if (unhop->un_nhop_src != NULL)
489 dump_nhop(unhop, hdr, nw);
490 else
491 dump_nhgrp(unhop, hdr, nw);
492 }
493
494 static int
495 delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx)
496 {
497 struct user_nhop *unhop_ret, *unhop_base, *unhop_chain;
498
499 struct user_nhop key = { .un_idx = uidx };
500
501 UN_WLOCK(ctl);
502
503 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop_base);
504
505 if (unhop_base != NULL) {
506 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_base, unhop_ret);
507 IF_DEBUG_LEVEL(LOG_DEBUG2) {
508 char nhbuf[NHOP_PRINT_BUFSIZE];
509 nhop_print_buf_any(unhop_base->un_nhop, nhbuf, sizeof(nhbuf));
510 FIB_NH_LOG(LOG_DEBUG3, unhop_base->un_nhop,
511 "removed base nhop %u: %s", uidx, nhbuf);
512 }
513 /* Unlink all child nexhops as well, keeping the chain intact */
514 unhop_chain = unhop_base->un_nextchild;
515 while (unhop_chain != NULL) {
516 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_chain,
517 unhop_ret);
518 MPASS(unhop_chain == unhop_ret);
519 IF_DEBUG_LEVEL(LOG_DEBUG3) {
520 char nhbuf[NHOP_PRINT_BUFSIZE];
521 nhop_print_buf_any(unhop_chain->un_nhop,
522 nhbuf, sizeof(nhbuf));
523 FIB_NH_LOG(LOG_DEBUG3, unhop_chain->un_nhop,
524 "removed child nhop %u: %s", uidx, nhbuf);
525 }
526 unhop_chain = unhop_chain->un_nextchild;
527 }
528 }
529
530 UN_WUNLOCK(ctl);
531
532 if (unhop_base == NULL) {
533 NL_LOG(LOG_DEBUG, "unable to find unhop %u", uidx);
534 return (ENOENT);
535 }
536
537 /* Report nexthop deletion */
538 struct netlink_walkargs wa = {
539 .hdr.nlmsg_pid = hdr->nlmsg_pid,
540 .hdr.nlmsg_seq = hdr->nlmsg_seq,
541 .hdr.nlmsg_flags = hdr->nlmsg_flags,
542 .hdr.nlmsg_type = NL_RTM_DELNEXTHOP,
543 };
544
545 struct nl_writer nw = {};
546 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) {
547 NL_LOG(LOG_DEBUG, "error allocating message writer");
548 return (ENOMEM);
549 }
550
551 dump_unhop(unhop_base, &wa.hdr, &nw);
552 nlmsg_flush(&nw);
553
554 while (unhop_base != NULL) {
555 unhop_chain = unhop_base->un_nextchild;
556 NET_EPOCH_CALL(destroy_unhop_epoch, &unhop_base->un_epoch_ctx);
557 unhop_base = unhop_chain;
558 }
559
560 return (0);
561 }
562
563 static void
564 consider_resize(struct unhop_ctl *ctl, uint32_t new_size)
565 {
566 void *new_ptr = NULL;
567 size_t alloc_size;
568
569 if (new_size == 0)
570 return;
571
572 if (new_size != 0) {
573 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_size);
574 new_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO);
575 if (new_ptr == NULL)
576 return;
577 }
578
579 NL_LOG(LOG_DEBUG, "resizing hash: %u -> %u", ctl->un_head.hash_size, new_size);
580 UN_WLOCK(ctl);
581 if (new_ptr != NULL) {
582 CHT_SLIST_RESIZE(&ctl->un_head, unhop, new_ptr, new_size);
583 }
584 UN_WUNLOCK(ctl);
585
586
587 if (new_ptr != NULL)
588 free(new_ptr, M_NETLINK);
589 }
590
591 static bool __noinline
592 vnet_init_unhops(void)
593 {
594 uint32_t num_buckets = 16;
595 size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
596
597 struct unhop_ctl *ctl = malloc(sizeof(struct unhop_ctl), M_NETLINK,
598 M_NOWAIT | M_ZERO);
599 if (ctl == NULL)
600 return (false);
601
602 void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO);
603 if (ptr == NULL) {
604 free(ctl, M_NETLINK);
605 return (false);
606 }
607 CHT_SLIST_INIT(&ctl->un_head, ptr, num_buckets);
608 UN_LOCK_INIT(ctl);
609
610 if (!atomic_cmpset_ptr((uintptr_t *)&V_un_ctl, (uintptr_t)NULL, (uintptr_t)ctl)) {
611 free(ptr, M_NETLINK);
612 free(ctl, M_NETLINK);
613 }
614
615 if (atomic_load_ptr(&V_un_ctl) == NULL)
616 return (false);
617
618 NL_LOG(LOG_NOTICE, "UNHOPS init done");
619
620 return (true);
621 }
622
623 static void
624 vnet_destroy_unhops(const void *unused __unused)
625 {
626 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
627 struct user_nhop *unhop, *tmp;
628
629 if (ctl == NULL)
630 return;
631 V_un_ctl = NULL;
632
633 /* Wait till all unhop users finish their reads */
634 NET_EPOCH_WAIT();
635
636 UN_WLOCK(ctl);
637 CHT_SLIST_FOREACH_SAFE(&ctl->un_head, unhop, unhop, tmp) {
638 destroy_unhop(unhop);
639 } CHT_SLIST_FOREACH_SAFE_END;
640 UN_WUNLOCK(ctl);
641
642 free(ctl->un_head.ptr, M_NETLINK);
643 free(ctl, M_NETLINK);
644 }
645 VNET_SYSUNINIT(vnet_destroy_unhops, SI_SUB_PROTO_IF, SI_ORDER_ANY,
646 vnet_destroy_unhops, NULL);
647
648 static int
649 nlattr_get_nhg(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
650 {
651 int error = 0;
652
653 /* Verify attribute correctness */
654 struct nexthop_grp *grp = NLA_DATA(nla);
655 int data_len = NLA_DATA_LEN(nla);
656
657 int count = data_len / sizeof(*grp);
658 if (count == 0 || (count * sizeof(*grp) != data_len)) {
659 NL_LOG(LOG_DEBUG, "Invalid length for RTA_GROUP: %d", data_len);
660 return (EINVAL);
661 }
662
663 *((struct nlattr **)target) = nla;
664 return (error);
665 }
666
667 struct nl_parsed_nhop {
668 uint32_t nha_id;
669 uint8_t nha_blackhole;
670 uint8_t nha_groups;
671 struct ifnet *nha_oif;
672 struct sockaddr *nha_gw;
673 struct nlattr *nha_group;
674 uint8_t nh_family;
675 uint8_t nh_protocol;
676 };
677
678 #define _IN(_field) offsetof(struct nhmsg, _field)
679 #define _OUT(_field) offsetof(struct nl_parsed_nhop, _field)
680 static const struct nlfield_parser nlf_p_nh[] = {
681 { .off_in = _IN(nh_family), .off_out = _OUT(nh_family), .cb = nlf_get_u8 },
682 { .off_in = _IN(nh_protocol), .off_out = _OUT(nh_protocol), .cb = nlf_get_u8 },
683 };
684
685 static const struct nlattr_parser nla_p_nh[] = {
686 { .type = NHA_ID, .off = _OUT(nha_id), .cb = nlattr_get_uint32 },
687 { .type = NHA_GROUP, .off = _OUT(nha_group), .cb = nlattr_get_nhg },
688 { .type = NHA_BLACKHOLE, .off = _OUT(nha_blackhole), .cb = nlattr_get_flag },
689 { .type = NHA_OIF, .off = _OUT(nha_oif), .cb = nlattr_get_ifp },
690 { .type = NHA_GATEWAY, .off = _OUT(nha_gw), .cb = nlattr_get_ip },
691 { .type = NHA_GROUPS, .off = _OUT(nha_groups), .cb = nlattr_get_flag },
692 };
693 #undef _IN
694 #undef _OUT
695 NL_DECLARE_PARSER(nhmsg_parser, struct nhmsg, nlf_p_nh, nla_p_nh);
696
697 static bool
698 eligible_nhg(const struct nhop_object *nh)
699 {
700 return (nh->nh_flags & NHF_GATEWAY);
701 }
702
703 static int
704 newnhg(struct unhop_ctl *ctl, struct nl_parsed_nhop *attrs, struct user_nhop *unhop)
705 {
706 struct nexthop_grp *grp = NLA_DATA(attrs->nha_group);
707 int count = NLA_DATA_LEN(attrs->nha_group) / sizeof(*grp);
708 struct weightened_nhop *wn;
709
710 wn = malloc(sizeof(*wn) * count, M_NETLINK, M_NOWAIT | M_ZERO);
711 if (wn == NULL)
712 return (ENOMEM);
713
714 for (int i = 0; i < count; i++) {
715 struct user_nhop *unhop;
716 unhop = nl_find_base_unhop(ctl, grp[i].id);
717 if (unhop == NULL) {
718 NL_LOG(LOG_DEBUG, "unable to find uidx %u", grp[i].id);
719 free(wn, M_NETLINK);
720 return (ESRCH);
721 } else if (unhop->un_nhop_src == NULL) {
722 NL_LOG(LOG_DEBUG, "uidx %u is a group, nested group unsupported",
723 grp[i].id);
724 free(wn, M_NETLINK);
725 return (ENOTSUP);
726 } else if (!eligible_nhg(unhop->un_nhop_src)) {
727 NL_LOG(LOG_DEBUG, "uidx %u nhop is not mpath-eligible",
728 grp[i].id);
729 free(wn, M_NETLINK);
730 return (ENOTSUP);
731 }
732 /*
733 * TODO: consider more rigid eligibility checks:
734 * restrict nexthops with the same gateway
735 */
736 wn[i].nh = unhop->un_nhop_src;
737 wn[i].weight = grp[i].weight;
738 }
739 unhop->un_nhgrp_src = wn;
740 unhop->un_nhgrp_count = count;
741 return (0);
742 }
743
744 static int
745 newnhop(struct nl_parsed_nhop *attrs, struct user_nhop *unhop)
746 {
747 struct ifaddr *ifa = NULL;
748 struct nhop_object *nh;
749 int error;
750
751 if (!attrs->nha_blackhole) {
752 if (attrs->nha_gw == NULL) {
753 NL_LOG(LOG_DEBUG, "missing NHA_GATEWAY");
754 return (EINVAL);
755 }
756 if (attrs->nha_oif == NULL) {
757 NL_LOG(LOG_DEBUG, "missing NHA_OIF");
758 return (EINVAL);
759 }
760 if (ifa == NULL)
761 ifa = ifaof_ifpforaddr(attrs->nha_gw, attrs->nha_oif);
762 if (ifa == NULL) {
763 NL_LOG(LOG_DEBUG, "Unable to determine default source IP");
764 return (EINVAL);
765 }
766 }
767
768 int family = attrs->nha_gw != NULL ? attrs->nha_gw->sa_family : attrs->nh_family;
769
770 nh = nhop_alloc(RT_DEFAULT_FIB, family);
771 if (nh == NULL) {
772 NL_LOG(LOG_DEBUG, "Unable to allocate nexthop");
773 return (ENOMEM);
774 }
775 nhop_set_uidx(nh, attrs->nha_id);
776
777 if (attrs->nha_blackhole)
778 nhop_set_blackhole(nh, NHF_BLACKHOLE);
779 else {
780 nhop_set_gw(nh, attrs->nha_gw, true);
781 nhop_set_transmit_ifp(nh, attrs->nha_oif);
782 nhop_set_src(nh, ifa);
783 }
784
785 error = nhop_get_unlinked(nh);
786 if (error != 0) {
787 NL_LOG(LOG_DEBUG, "unable to finalize nexthop");
788 return (error);
789 }
790
791 IF_DEBUG_LEVEL(LOG_DEBUG2) {
792 char nhbuf[NHOP_PRINT_BUFSIZE];
793 nhop_print_buf(nh, nhbuf, sizeof(nhbuf));
794 NL_LOG(LOG_DEBUG2, "Adding unhop %u: %s", attrs->nha_id, nhbuf);
795 }
796
797 unhop->un_nhop_src = nh;
798 return (0);
799 }
800
801 static int
802 rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
803 struct nl_pstate *npt)
804 {
805 struct user_nhop *unhop;
806 int error;
807
808 if ((__predict_false(V_un_ctl == NULL)) && (!vnet_init_unhops()))
809 return (ENOMEM);
810 struct unhop_ctl *ctl = V_un_ctl;
811
812 struct nl_parsed_nhop attrs = {};
813 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs);
814 if (error != 0)
815 return (error);
816
817 /*
818 * Get valid nha_id. Treat nha_id == 0 (auto-assignment) as a second-class
819 * citizen.
820 */
821 if (attrs.nha_id == 0) {
822 attrs.nha_id = find_spare_uidx(ctl);
823 if (attrs.nha_id == 0) {
824 NL_LOG(LOG_DEBUG, "Unable to get spare uidx");
825 return (ENOSPC);
826 }
827 }
828
829 NL_LOG(LOG_DEBUG, "IFINDEX %d", attrs.nha_oif ? attrs.nha_oif->if_index : 0);
830
831 unhop = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO);
832 if (unhop == NULL) {
833 NL_LOG(LOG_DEBUG, "Unable to allocate user_nhop");
834 return (ENOMEM);
835 }
836 unhop->un_idx = attrs.nha_id;
837 unhop->un_protocol = attrs.nh_protocol;
838
839 if (attrs.nha_group)
840 error = newnhg(ctl, &attrs, unhop);
841 else
842 error = newnhop(&attrs, unhop);
843
844 if (error != 0) {
845 free(unhop, M_NETLINK);
846 return (error);
847 }
848
849 UN_WLOCK(ctl);
850 /* Check if uidx already exists */
851 struct user_nhop *tmp = NULL;
852 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, unhop, tmp);
853 if (tmp != NULL) {
854 UN_WUNLOCK(ctl);
855 NL_LOG(LOG_DEBUG, "nhop idx %u already exists", attrs.nha_id);
856 destroy_unhop(unhop);
857 return (EEXIST);
858 }
859 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, unhop);
860 uint32_t num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->un_head);
861 UN_WUNLOCK(ctl);
862
863 /* Report addition of the next nexhop */
864 struct netlink_walkargs wa = {
865 .hdr.nlmsg_pid = hdr->nlmsg_pid,
866 .hdr.nlmsg_seq = hdr->nlmsg_seq,
867 .hdr.nlmsg_flags = hdr->nlmsg_flags,
868 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP,
869 };
870
871 struct nl_writer nw = {};
872 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) {
873 NL_LOG(LOG_DEBUG, "error allocating message writer");
874 return (ENOMEM);
875 }
876
877 dump_unhop(unhop, &wa.hdr, &nw);
878 nlmsg_flush(&nw);
879
880 consider_resize(ctl, num_buckets_new);
881
882 return (0);
883 }
884
885 static int
886 rtnl_handle_delnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
887 struct nl_pstate *npt)
888 {
889 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
890 int error;
891
892 if (__predict_false(ctl == NULL))
893 return (ESRCH);
894
895 struct nl_parsed_nhop attrs = {};
896 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs);
897 if (error != 0)
898 return (error);
899
900 if (attrs.nha_id == 0) {
901 NL_LOG(LOG_DEBUG, "NHA_ID not set");
902 return (EINVAL);
903 }
904
905 error = delete_unhop(ctl, hdr, attrs.nha_id);
906
907 return (error);
908 }
909
910 static bool
911 match_unhop(const struct nl_parsed_nhop *attrs, struct user_nhop *unhop)
912 {
913 if (attrs->nha_id != 0 && unhop->un_idx != attrs->nha_id)
914 return (false);
915 if (attrs->nha_groups != 0 && unhop->un_nhgrp_src == NULL)
916 return (false);
917 if (attrs->nha_oif != NULL &&
918 (unhop->un_nhop_src == NULL || unhop->un_nhop_src->nh_ifp != attrs->nha_oif))
919 return (false);
920
921 return (true);
922 }
923
924 static int
925 rtnl_handle_getnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
926 struct nl_pstate *npt)
927 {
928 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl);
929 struct user_nhop *unhop;
930 UN_TRACKER;
931 int error;
932
933 if (__predict_false(ctl == NULL))
934 return (ESRCH);
935
936 struct nl_parsed_nhop attrs = {};
937 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs);
938 if (error != 0)
939 return (error);
940
941 struct netlink_walkargs wa = {
942 .nw = npt->nw,
943 .hdr.nlmsg_pid = hdr->nlmsg_pid,
944 .hdr.nlmsg_seq = hdr->nlmsg_seq,
945 .hdr.nlmsg_flags = hdr->nlmsg_flags,
946 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP,
947 };
948
949 if (attrs.nha_id != 0) {
950 NL_LOG(LOG_DEBUG2, "searching for uidx %u", attrs.nha_id);
951 struct user_nhop key= { .un_idx = attrs.nha_id };
952 UN_RLOCK(ctl);
953 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop);
954 UN_RUNLOCK(ctl);
955
956 if (unhop == NULL)
957 return (ESRCH);
958 dump_unhop(unhop, &wa.hdr, wa.nw);
959 return (0);
960 }
961
962 UN_RLOCK(ctl);
963 wa.hdr.nlmsg_flags |= NLM_F_MULTI;
964 CHT_SLIST_FOREACH(&ctl->un_head, unhop, unhop) {
965 if (UNHOP_IS_MASTER(unhop) && match_unhop(&attrs, unhop))
966 dump_unhop(unhop, &wa.hdr, wa.nw);
967 } CHT_SLIST_FOREACH_END;
968 UN_RUNLOCK(ctl);
969
970 if (wa.error == 0) {
971 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr))
972 return (ENOMEM);
973 }
974 return (0);
975 }
976
977 static const struct rtnl_cmd_handler cmd_handlers[] = {
978 {
979 .cmd = NL_RTM_NEWNEXTHOP,
980 .name = "RTM_NEWNEXTHOP",
981 .cb = &rtnl_handle_newnhop,
982 .priv = PRIV_NET_ROUTE,
983 },
984 {
985 .cmd = NL_RTM_DELNEXTHOP,
986 .name = "RTM_DELNEXTHOP",
987 .cb = &rtnl_handle_delnhop,
988 .priv = PRIV_NET_ROUTE,
989 },
990 {
991 .cmd = NL_RTM_GETNEXTHOP,
992 .name = "RTM_GETNEXTHOP",
993 .cb = &rtnl_handle_getnhop,
994 }
995 };
996
997 static const struct nlhdr_parser *all_parsers[] = { &nhmsg_parser };
998
999 void
1000 rtnl_nexthops_init(void)
1001 {
1002 NL_VERIFY_PARSERS(all_parsers);
1003 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1004 }
Cache object: 5f9d50f2063ee824647d984d857249f1
|