1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD$
28 */
29 #include "opt_inet.h"
30 #include "opt_route.h"
31
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/lock.h>
36 #include <sys/rmlock.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/refcount.h>
40 #include <sys/socket.h>
41 #include <sys/sysctl.h>
42 #include <sys/kernel.h>
43 #include <sys/epoch.h>
44
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/if_private.h>
48 #include <net/route.h>
49 #include <net/route/route_ctl.h>
50 #include <net/route/route_var.h>
51 #include <net/vnet.h>
52
53 #include <netinet/in.h>
54 #include <netinet/in_var.h>
55 #include <netinet/in_fib.h>
56
57 #include <net/route/nhop_utils.h>
58 #include <net/route/nhop.h>
59 #include <net/route/nhop_var.h>
60 #include <net/route/nhgrp_var.h>
61
62 #define DEBUG_MOD_NAME nhgrp_ctl
63 #define DEBUG_MAX_LEVEL LOG_DEBUG
64 #include <net/route/route_debug.h>
65 _DECLARE_DEBUG(LOG_INFO);
66
67 /*
68 * This file contains the supporting functions for creating multipath groups
69 * and compiling their dataplane parts.
70 */
71
72 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */
73 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH,
74 "MPF_MULTIPATH must be the same as NHF_MULTIPATH");
75 /* Offset and size of flags field has to be the same for nhop/nhop groups */
76 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags);
77 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */
78 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64);
79
80 static int wn_cmp_idx(const void *a, const void *b);
81 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops);
82
83 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl,
84 struct weightened_nhop *wn, int num_nhops, uint32_t uidx, int *perror);
85 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv);
86 static void destroy_nhgrp_epoch(epoch_context_t ctx);
87 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv);
88
89 static int
90 wn_cmp_idx(const void *a, const void *b)
91 {
92 const struct weightened_nhop *w_a = a;
93 const struct weightened_nhop *w_b = b;
94 uint32_t a_idx = w_a->nh->nh_priv->nh_idx;
95 uint32_t b_idx = w_b->nh->nh_priv->nh_idx;
96
97 if (a_idx < b_idx)
98 return (-1);
99 else if (a_idx > b_idx)
100 return (1);
101 else
102 return (0);
103 }
104
105 /*
106 * Perform in-place sorting for array of nexthops in @wn.
107 * Sort by nexthop index ascending.
108 */
109 static void
110 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops)
111 {
112
113 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp_idx);
114 }
115
116 /*
117 * In order to determine the minimum weight difference in the array
118 * of weights, create a sorted array of weights, using spare "storage"
119 * field in the `struct weightened_nhop`.
120 * Assume weights to be (mostly) the same and use insertion sort to
121 * make it sorted.
122 */
123 static void
124 sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items)
125 {
126 wn[0].storage = wn[0].weight;
127 for (int i = 1, j = 0; i < num_items; i++) {
128 uint32_t weight = wn[i].weight; // read from 'weight' as it's not reordered
129 /* Move all weights > weight 1 position right */
130 for (j = i - 1; j >= 0 && wn[j].storage > weight; j--)
131 wn[j + 1].storage = wn[j].storage;
132 wn[j + 1].storage = weight;
133 }
134 }
135
136 /*
137 * Calculate minimum number of slots required to fit the existing
138 * set of weights in the common use case where weights are "easily"
139 * comparable.
140 * Assumes @wn is sorted by weight ascending and each weight is > 0.
141 * Returns number of slots or 0 if precise calculation failed.
142 *
143 * Some examples:
144 * note: (i, X) pair means (nhop=i, weight=X):
145 * (1, 1) (2, 2) -> 3 slots [1, 2, 2]
146 * (1, 100), (2, 200) -> 3 slots [1, 2, 2]
147 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3]
148 */
149 static uint32_t
150 calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items,
151 uint64_t *ptotal)
152 {
153 uint32_t i, last, xmin;
154 uint64_t total = 0;
155
156 // Get sorted array of weights in .storage field
157 sort_weightened_nhops_weights(wn, num_items);
158
159 last = 0;
160 xmin = wn[0].storage;
161 for (i = 0; i < num_items; i++) {
162 total += wn[i].storage;
163 if ((wn[i].storage != last) &&
164 ((wn[i].storage - last < xmin) || xmin == 0)) {
165 xmin = wn[i].storage - last;
166 }
167 last = wn[i].storage;
168 }
169 *ptotal = total;
170 /* xmin is the minimum unit of desired capacity */
171 if ((total % xmin) != 0)
172 return (0);
173 for (i = 0; i < num_items; i++) {
174 if ((wn[i].weight % xmin) != 0)
175 return (0);
176 }
177
178 return ((uint32_t)(total / xmin));
179 }
180
181 /*
182 * Calculate minimum number of slots required to fit the existing
183 * set of weights while maintaining weight coefficients.
184 *
185 * Assume @wn is sorted by weight ascending and each weight is > 0.
186 *
187 * Tries to find simple precise solution first and falls back to
188 * RIB_MAX_MPATH_WIDTH in case of any failure.
189 */
190 static uint32_t
191 calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items)
192 {
193 uint32_t v;
194 uint64_t total;
195
196 v = calc_min_mpath_slots_fast(wn, num_items, &total);
197 if (total == 0)
198 return (0);
199 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH))
200 v = RIB_MAX_MPATH_WIDTH;
201
202 return (v);
203 }
204
205 /*
206 * Nexthop group data consists of
207 * 1) dataplane part, with nhgrp_object as a header followed by an
208 * arbitrary number of nexthop pointers.
209 * 2) control plane part, with nhgrp_priv as a header, followed by
210 * an arbirtrary number of 'struct weightened_nhop' object.
211 *
212 * Given nexthop groups are (mostly) immutable, allocate all data
213 * in one go.
214 *
215 */
216 __noinline static size_t
217 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops)
218 {
219 size_t sz;
220
221 sz = sizeof(struct nhgrp_object);
222 sz += nhg_size * sizeof(struct nhop_object *);
223 sz += sizeof(struct nhgrp_priv);
224 sz += num_nhops * sizeof(struct weightened_nhop);
225 return (sz);
226 }
227
228 /*
229 * Compile actual list of nexthops to be used by datapath from
230 * the nexthop group @dst.
231 *
232 * For example, compiling control plane list of 2 nexthops
233 * [(200, A), (100, B)] would result in the datapath array
234 * [A, A, B]
235 */
236 static void
237 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x,
238 uint32_t num_slots)
239 {
240 struct nhgrp_object *dst;
241 int i, slot_idx, remaining_slots;
242 uint64_t remaining_sum, nh_weight, nh_slots;
243
244 slot_idx = 0;
245 dst = dst_priv->nhg;
246 /* Calculate sum of all weights */
247 remaining_sum = 0;
248 for (i = 0; i < dst_priv->nhg_nh_count; i++)
249 remaining_sum += x[i].weight;
250 remaining_slots = num_slots;
251 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d",
252 remaining_sum, remaining_slots);
253 for (i = 0; i < dst_priv->nhg_nh_count; i++) {
254 /* Calculate number of slots for the current nexthop */
255 if (remaining_sum > 0) {
256 nh_weight = (uint64_t)x[i].weight;
257 nh_slots = (nh_weight * remaining_slots / remaining_sum);
258 } else
259 nh_slots = 0;
260
261 remaining_sum -= x[i].weight;
262 remaining_slots -= nh_slots;
263
264 FIB_NH_LOG(LOG_DEBUG3, x[0].nh,
265 " rem_sum: %lu, rem_slots: %d nh_slots: %d, slot_idx: %d",
266 remaining_sum, remaining_slots, (int)nh_slots, slot_idx);
267
268 KASSERT((slot_idx + nh_slots <= num_slots),
269 ("index overflow during nhg compilation"));
270 while (nh_slots-- > 0)
271 dst->nhops[slot_idx++] = x[i].nh;
272 }
273 }
274
275 /*
276 * Allocates new nexthop group for the list of weightened nexthops.
277 * Assume sorted list.
278 * Does NOT reference any nexthops in the group.
279 * Returns group with refcount=1 or NULL.
280 */
281 static struct nhgrp_priv *
282 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops)
283 {
284 uint32_t nhgrp_size;
285 struct nhgrp_object *nhg;
286 struct nhgrp_priv *nhg_priv;
287
288 nhgrp_size = calc_min_mpath_slots(wn, num_nhops);
289 if (nhgrp_size == 0) {
290 /* Zero weights, abort */
291 return (NULL);
292 }
293
294 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops);
295 nhg = malloc(sz, M_NHOP, M_NOWAIT | M_ZERO);
296 if (nhg == NULL) {
297 FIB_NH_LOG(LOG_INFO, wn[0].nh,
298 "unable to allocate group with num_nhops %d (compiled %u)",
299 num_nhops, nhgrp_size);
300 return (NULL);
301 }
302
303 /* Has to be the first to make NHGRP_PRIV() work */
304 nhg->nhg_size = nhgrp_size;
305 nhg->nhg_flags = MPF_MULTIPATH;
306
307 nhg_priv = NHGRP_PRIV(nhg);
308 nhg_priv->nhg_nh_count = num_nhops;
309 refcount_init(&nhg_priv->nhg_refcount, 1);
310
311 /* Please see nhgrp_free() comments on the initial value */
312 refcount_init(&nhg_priv->nhg_linked, 2);
313
314 nhg_priv->nhg = nhg;
315 memcpy(&nhg_priv->nhg_nh_weights[0], wn,
316 num_nhops * sizeof(struct weightened_nhop));
317
318 FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u",
319 num_nhops, nhgrp_size);
320
321 compile_nhgrp(nhg_priv, wn, nhg->nhg_size);
322
323 return (nhg_priv);
324 }
325
326 void
327 nhgrp_ref_object(struct nhgrp_object *nhg)
328 {
329 struct nhgrp_priv *nhg_priv;
330 u_int old __diagused;
331
332 nhg_priv = NHGRP_PRIV(nhg);
333 old = refcount_acquire(&nhg_priv->nhg_refcount);
334 KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg));
335 }
336
337 void
338 nhgrp_free(struct nhgrp_object *nhg)
339 {
340 struct nhgrp_priv *nhg_priv;
341 struct nh_control *ctl;
342 struct epoch_tracker et;
343
344 nhg_priv = NHGRP_PRIV(nhg);
345
346 if (!refcount_release(&nhg_priv->nhg_refcount))
347 return;
348
349 /*
350 * group objects don't have an explicit lock attached to it.
351 * As groups are reclaimed based on reference count, it is possible
352 * that some groups will persist after vnet destruction callback
353 * called. Given that, handle scenario with nhgrp_free_group() being
354 * called either after or simultaneously with nhgrp_ctl_unlink_all()
355 * by using another reference counter: nhg_linked.
356 *
357 * There are only 2 places, where nhg_linked can be decreased:
358 * rib destroy (nhgrp_ctl_unlink_all) and this function.
359 * nhg_link can never be increased.
360 *
361 * Hence, use initial value of 2 to make use of
362 * refcount_release_if_not_last().
363 *
364 * There can be two scenarious when calling this function:
365 *
366 * 1) nhg_linked value is 2. This means that either
367 * nhgrp_ctl_unlink_all() has not been called OR it is running,
368 * but we are guaranteed that nh_control won't be freed in
369 * this epoch. Hence, nexthop can be safely unlinked.
370 *
371 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all()
372 * has been called and nhgrp unlink can be skipped.
373 */
374
375 NET_EPOCH_ENTER(et);
376 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) {
377 ctl = nhg_priv->nh_control;
378 if (unlink_nhgrp(ctl, nhg_priv) == NULL) {
379 /* Do not try to reclaim */
380 RT_LOG(LOG_INFO, "Failed to unlink nexhop group %p",
381 nhg_priv);
382 NET_EPOCH_EXIT(et);
383 return;
384 }
385 }
386 NET_EPOCH_EXIT(et);
387
388 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0"));
389 NET_EPOCH_CALL(destroy_nhgrp_epoch, &nhg_priv->nhg_epoch_ctx);
390 }
391
392 /*
393 * Destroys all local resources belonging to @nhg_priv.
394 */
395 __noinline static void
396 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv)
397 {
398
399 free(nhg_priv->nhg, M_NHOP);
400 }
401
402 __noinline static void
403 destroy_nhgrp(struct nhgrp_priv *nhg_priv)
404 {
405
406 KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0"));
407 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0"));
408
409 IF_DEBUG_LEVEL(LOG_DEBUG2) {
410 char nhgbuf[NHOP_PRINT_BUFSIZE] __unused;
411 FIB_NH_LOG(LOG_DEBUG2, nhg_priv->nhg_nh_weights[0].nh,
412 "destroying %s", nhgrp_print_buf(nhg_priv->nhg,
413 nhgbuf, sizeof(nhgbuf)));
414 }
415
416 free_nhgrp_nhops(nhg_priv);
417 destroy_nhgrp_int(nhg_priv);
418 }
419
420 /*
421 * Epoch callback indicating group is safe to destroy
422 */
423 static void
424 destroy_nhgrp_epoch(epoch_context_t ctx)
425 {
426 struct nhgrp_priv *nhg_priv;
427
428 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx);
429
430 destroy_nhgrp(nhg_priv);
431 }
432
433 static bool
434 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
435 {
436
437 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
438 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0)
439 continue;
440
441 /*
442 * Failed to ref the nexthop, b/c it's deleted.
443 * Need to rollback references back.
444 */
445 for (int j = 0; j < i; j++)
446 nhop_free(nhg_priv->nhg_nh_weights[j].nh);
447 return (false);
448 }
449
450 return (true);
451 }
452
453 static void
454 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
455 {
456
457 for (int i = 0; i < nhg_priv->nhg_nh_count; i++)
458 nhop_free(nhg_priv->nhg_nh_weights[i].nh);
459 }
460
461 /*
462 * Allocate nexthop group of size @num_nhops with nexthops specified by
463 * @wn. Nexthops have to be unique and match the fibnum/family of the group.
464 * Returns unlinked nhgrp object on success or NULL and non-zero perror.
465 */
466 struct nhgrp_object *
467 nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nhops,
468 int *perror)
469 {
470 struct rib_head *rh = rt_tables_get_rnh(fibnum, family);
471 struct nhgrp_priv *nhg_priv;
472 struct nh_control *ctl;
473
474 if (rh == NULL) {
475 *perror = E2BIG;
476 return (NULL);
477 }
478
479 ctl = rh->nh_control;
480
481 if (num_nhops > RIB_MAX_MPATH_WIDTH) {
482 *perror = E2BIG;
483 return (NULL);
484 }
485
486 if (ctl->gr_head.hash_size == 0) {
487 /* First multipath request. Bootstrap mpath datastructures. */
488 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) {
489 *perror = ENOMEM;
490 return (NULL);
491 }
492 }
493
494 /* Sort nexthops & check there are no duplicates */
495 sort_weightened_nhops(wn, num_nhops);
496 uint32_t last_id = 0;
497 for (int i = 0; i < num_nhops; i++) {
498 if (wn[i].nh->nh_priv->nh_control != ctl) {
499 *perror = EINVAL;
500 return (NULL);
501 }
502 if (wn[i].nh->nh_priv->nh_idx == last_id) {
503 *perror = EEXIST;
504 return (NULL);
505 }
506 last_id = wn[i].nh->nh_priv->nh_idx;
507 }
508
509 if ((nhg_priv = alloc_nhgrp(wn, num_nhops)) == NULL) {
510 *perror = ENOMEM;
511 return (NULL);
512 }
513 nhg_priv->nh_control = ctl;
514
515 *perror = 0;
516 return (nhg_priv->nhg);
517 }
518
519 /*
520 * Finds an existing group matching @nhg or links @nhg to the tree.
521 * Returns the referenced group or NULL and non-zero @perror.
522 */
523 struct nhgrp_object *
524 nhgrp_get_nhgrp(struct nhgrp_object *nhg, int *perror)
525 {
526 struct nhgrp_priv *nhg_priv, *key = NHGRP_PRIV(nhg);
527 struct nh_control *ctl = key->nh_control;
528
529 nhg_priv = find_nhgrp(ctl, key);
530 if (nhg_priv != NULL) {
531 /*
532 * Free originally-created group. As it hasn't been linked
533 * and the dependent nexhops haven't been referenced, just free
534 * the group.
535 */
536 destroy_nhgrp_int(key);
537 *perror = 0;
538 return (nhg_priv->nhg);
539 } else {
540 /* No existing group, try to link the new one */
541 if (!ref_nhgrp_nhops(key)) {
542 /*
543 * Some of the nexthops have been scheduled for deletion.
544 * As the group hasn't been linked / no nexhops have been
545 * referenced, call the final destructor immediately.
546 */
547 destroy_nhgrp_int(key);
548 *perror = EAGAIN;
549 return (NULL);
550 }
551 if (link_nhgrp(ctl, key) == 0) {
552 /* Unable to allocate index? */
553 *perror = EAGAIN;
554 free_nhgrp_nhops(key);
555 destroy_nhgrp_int(key);
556 return (NULL);
557 }
558 *perror = 0;
559 return (nhg);
560 }
561
562 /* NOTREACHED */
563 }
564
565 /*
566 * Creates or looks up an existing nexthop group based on @wn and @num_nhops.
567 *
568 * Returns referenced nhop group or NULL, passing error code in @perror.
569 */
570 struct nhgrp_priv *
571 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops,
572 uint32_t uidx, int *perror)
573 {
574 struct nhgrp_object *nhg;
575
576 nhg = nhgrp_alloc(ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family,
577 wn, num_nhops, perror);
578 if (nhg == NULL)
579 return (NULL);
580 nhgrp_set_uidx(nhg, uidx);
581 nhg = nhgrp_get_nhgrp(nhg, perror);
582 if (nhg != NULL)
583 return (NHGRP_PRIV(nhg));
584 return (NULL);
585 }
586
587
588 /*
589 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig.
590 *
591 * Returns referenced nexthop group or NULL. In the latter case, @perror is
592 * filled with an error code.
593 * Note that function does NOT care if the next nexthops already exists
594 * in the @gr_orig. As a result, they will be added, resulting in the
595 * same nexthop being present multiple times in the new group.
596 */
597 static struct nhgrp_priv *
598 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig,
599 struct weightened_nhop *wn, int num_nhops, int *perror)
600 {
601 char storage[64];
602 struct weightened_nhop *pnhops;
603 struct nhgrp_priv *nhg_priv;
604 const struct nhgrp_priv *src_priv;
605 size_t sz;
606 int curr_nhops;
607
608 src_priv = NHGRP_PRIV_CONST(gr_orig);
609 curr_nhops = src_priv->nhg_nh_count;
610
611 *perror = 0;
612
613 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop));
614 /* optimize for <= 4 paths, each path=16 bytes */
615 if (sz <= sizeof(storage))
616 pnhops = (struct weightened_nhop *)&storage[0];
617 else {
618 pnhops = malloc(sz, M_TEMP, M_NOWAIT);
619 if (pnhops == NULL) {
620 *perror = ENOMEM;
621 return (NULL);
622 }
623 }
624
625 /* Copy nhops from original group first */
626 memcpy(pnhops, src_priv->nhg_nh_weights,
627 curr_nhops * sizeof(struct weightened_nhop));
628 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop));
629 curr_nhops += num_nhops;
630
631 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, 0, perror);
632
633 if (pnhops != (struct weightened_nhop *)&storage[0])
634 free(pnhops, M_TEMP);
635
636 if (nhg_priv == NULL)
637 return (NULL);
638
639 return (nhg_priv);
640 }
641
642
643 /*
644 * Creates/finds nexthop group based on @wn and @num_nhops.
645 * Returns 0 on success with referenced group in @rnd, or
646 * errno.
647 *
648 * If the error is EAGAIN, then the operation can be retried.
649 */
650 int
651 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops,
652 uint32_t uidx, struct nhgrp_object **pnhg)
653 {
654 struct nh_control *ctl = rh->nh_control;
655 struct nhgrp_priv *nhg_priv;
656 int error;
657
658 nhg_priv = get_nhgrp(ctl, wn, num_nhops, uidx, &error);
659 if (nhg_priv != NULL)
660 *pnhg = nhg_priv->nhg;
661
662 return (error);
663 }
664
665 /*
666 * Creates new nexthop group based on @src group without the nexthops
667 * chosen by @flt_func.
668 * Returns 0 on success, storring the reference nhop group/object in @rnd.
669 */
670 int
671 nhgrp_get_filtered_group(struct rib_head *rh, const struct rtentry *rt,
672 const struct nhgrp_object *src, rib_filter_f_t flt_func, void *flt_data,
673 struct route_nhop_data *rnd)
674 {
675 char storage[64];
676 struct nh_control *ctl = rh->nh_control;
677 struct weightened_nhop *pnhops;
678 const struct nhgrp_priv *mp_priv, *src_priv;
679 size_t sz;
680 int error, i, num_nhops;
681
682 src_priv = NHGRP_PRIV_CONST(src);
683
684 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop));
685 /* optimize for <= 4 paths, each path=16 bytes */
686 if (sz <= sizeof(storage))
687 pnhops = (struct weightened_nhop *)&storage[0];
688 else {
689 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL)
690 return (ENOMEM);
691 }
692
693 /* Filter nexthops */
694 error = 0;
695 num_nhops = 0;
696 for (i = 0; i < src_priv->nhg_nh_count; i++) {
697 if (flt_func(rt, src_priv->nhg_nh_weights[i].nh, flt_data))
698 continue;
699 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i],
700 sizeof(struct weightened_nhop));
701 }
702
703 if (num_nhops == 0) {
704 rnd->rnd_nhgrp = NULL;
705 rnd->rnd_weight = 0;
706 } else if (num_nhops == 1) {
707 rnd->rnd_nhop = pnhops[0].nh;
708 rnd->rnd_weight = pnhops[0].weight;
709 if (nhop_try_ref_object(rnd->rnd_nhop) == 0)
710 error = EAGAIN;
711 } else {
712 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, 0, &error);
713 if (mp_priv != NULL)
714 rnd->rnd_nhgrp = mp_priv->nhg;
715 rnd->rnd_weight = 0;
716 }
717
718 if (pnhops != (struct weightened_nhop *)&storage[0])
719 free(pnhops, M_TEMP);
720
721 return (error);
722 }
723
724 /*
725 * Creates new multipath group based on existing group/nhop in @rnd_orig and
726 * to-be-added nhop @wn_add.
727 * Returns 0 on success and stores result in @rnd_new.
728 */
729 int
730 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig,
731 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new)
732 {
733 struct nh_control *ctl = rh->nh_control;
734 struct nhgrp_priv *nhg_priv;
735 struct weightened_nhop wn[2] = {};
736 int error;
737
738 if (rnd_orig->rnd_nhop == NULL) {
739 /* No paths to add to, just reference current nhop */
740 *rnd_new = *rnd_add;
741 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0)
742 return (EAGAIN);
743 return (0);
744 }
745
746 wn[0].nh = rnd_add->rnd_nhop;
747 wn[0].weight = rnd_add->rnd_weight;
748
749 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) {
750 /* Simple merge of 2 non-multipath nexthops */
751 wn[1].nh = rnd_orig->rnd_nhop;
752 wn[1].weight = rnd_orig->rnd_weight;
753 nhg_priv = get_nhgrp(ctl, wn, 2, 0, &error);
754 } else {
755 /* Get new nhop group with @rt->rt_nhop as an additional nhop */
756 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1,
757 &error);
758 }
759
760 if (nhg_priv == NULL)
761 return (error);
762 rnd_new->rnd_nhgrp = nhg_priv->nhg;
763 rnd_new->rnd_weight = 0;
764
765 return (0);
766 }
767
768 /*
769 * Returns pointer to array of nexthops with weights for
770 * given @nhg. Stores number of items in the array into @pnum_nhops.
771 */
772 const struct weightened_nhop *
773 nhgrp_get_nhops(const struct nhgrp_object *nhg, uint32_t *pnum_nhops)
774 {
775 const struct nhgrp_priv *nhg_priv;
776
777 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
778
779 nhg_priv = NHGRP_PRIV_CONST(nhg);
780 *pnum_nhops = nhg_priv->nhg_nh_count;
781
782 return (nhg_priv->nhg_nh_weights);
783 }
784
785 void
786 nhgrp_set_uidx(struct nhgrp_object *nhg, uint32_t uidx)
787 {
788 struct nhgrp_priv *nhg_priv;
789
790 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
791
792 nhg_priv = NHGRP_PRIV(nhg);
793
794 nhg_priv->nhg_uidx = uidx;
795 }
796
797 uint32_t
798 nhgrp_get_uidx(const struct nhgrp_object *nhg)
799 {
800 const struct nhgrp_priv *nhg_priv;
801
802 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
803
804 nhg_priv = NHGRP_PRIV_CONST(nhg);
805 return (nhg_priv->nhg_uidx);
806 }
807
808 /*
809 * Prints nexhop group @nhg data in the provided @buf.
810 * Example: nhg#33/sz=3:[#1:100,#2:100,#3:100]
811 * Example: nhg#33/sz=5:[#1:100,#2:100,..]
812 */
813 char *
814 nhgrp_print_buf(const struct nhgrp_object *nhg, char *buf, size_t bufsize)
815 {
816 const struct nhgrp_priv *nhg_priv = NHGRP_PRIV_CONST(nhg);
817
818 int off = snprintf(buf, bufsize, "nhg#%u/sz=%u:[", nhg_priv->nhg_idx,
819 nhg_priv->nhg_nh_count);
820
821 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
822 const struct weightened_nhop *wn = &nhg_priv->nhg_nh_weights[i];
823 int len = snprintf(&buf[off], bufsize - off, "#%u:%u,",
824 wn->nh->nh_priv->nh_idx, wn->weight);
825 if (len + off + 3 >= bufsize) {
826 int len = snprintf(&buf[off], bufsize - off, "...");
827 off += len;
828 break;
829 }
830 off += len;
831 }
832 if (off > 0)
833 off--; // remove last ","
834 if (off + 1 < bufsize)
835 snprintf(&buf[off], bufsize - off, "]");
836 return buf;
837 }
838
839 __noinline static int
840 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv,
841 char *buffer, size_t buffer_size, struct sysctl_req *w)
842 {
843 struct rt_msghdr *rtm;
844 struct nhgrp_external *nhge;
845 struct nhgrp_container *nhgc;
846 const struct nhgrp_object *nhg;
847 struct nhgrp_nhop_external *ext;
848 int error;
849 size_t sz;
850
851 nhg = nhg_priv->nhg;
852
853 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
854 /* controlplane nexthops */
855 sz += sizeof(struct nhgrp_container);
856 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
857 /* dataplane nexthops */
858 sz += sizeof(struct nhgrp_container);
859 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
860
861 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size"));
862
863 bzero(buffer, sz);
864
865 rtm = (struct rt_msghdr *)buffer;
866 rtm->rtm_msglen = sz;
867 rtm->rtm_version = RTM_VERSION;
868 rtm->rtm_type = RTM_GET;
869
870 nhge = (struct nhgrp_external *)(rtm + 1);
871
872 nhge->nhg_idx = nhg_priv->nhg_idx;
873 nhge->nhg_refcount = nhg_priv->nhg_refcount;
874
875 /* fill in control plane nexthops firs */
876 nhgc = (struct nhgrp_container *)(nhge + 1);
877 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS;
878 nhgc->nhgc_subtype = 0;
879 nhgc->nhgc_len = sizeof(struct nhgrp_container);
880 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
881 nhgc->nhgc_count = nhg_priv->nhg_nh_count;
882
883 ext = (struct nhgrp_nhop_external *)(nhgc + 1);
884 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
885 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx;
886 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight;
887 }
888
889 /* fill in dataplane nexthops */
890 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]);
891 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS;
892 nhgc->nhgc_subtype = 0;
893 nhgc->nhgc_len = sizeof(struct nhgrp_container);
894 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
895 nhgc->nhgc_count = nhg->nhg_size;
896
897 ext = (struct nhgrp_nhop_external *)(nhgc + 1);
898 for (int i = 0; i < nhg->nhg_size; i++) {
899 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx;
900 ext[i].nh_weight = 0;
901 }
902
903 error = SYSCTL_OUT(w, buffer, sz);
904
905 return (error);
906 }
907
908 uint32_t
909 nhgrp_get_idx(const struct nhgrp_object *nhg)
910 {
911 const struct nhgrp_priv *nhg_priv;
912
913 nhg_priv = NHGRP_PRIV_CONST(nhg);
914 return (nhg_priv->nhg_idx);
915 }
916
917 uint8_t
918 nhgrp_get_origin(const struct nhgrp_object *nhg)
919 {
920 return (NHGRP_PRIV_CONST(nhg)->nhg_origin);
921 }
922
923 void
924 nhgrp_set_origin(struct nhgrp_object *nhg, uint8_t origin)
925 {
926 NHGRP_PRIV(nhg)->nhg_origin = origin;
927 }
928
929 uint32_t
930 nhgrp_get_count(struct rib_head *rh)
931 {
932 struct nh_control *ctl;
933 uint32_t count;
934
935 ctl = rh->nh_control;
936
937 NHOPS_RLOCK(ctl);
938 count = ctl->gr_head.items_count;
939 NHOPS_RUNLOCK(ctl);
940
941 return (count);
942 }
943
944 int
945 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w)
946 {
947 struct nh_control *ctl = rh->nh_control;
948 struct epoch_tracker et;
949 struct nhgrp_priv *nhg_priv;
950 char *buffer;
951 size_t sz;
952 int error = 0;
953
954 if (ctl->gr_head.items_count == 0)
955 return (0);
956
957 /* Calculate the maximum nhop group size in bytes */
958 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
959 sz += 2 * sizeof(struct nhgrp_container);
960 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH;
961 buffer = malloc(sz, M_TEMP, M_NOWAIT);
962 if (buffer == NULL)
963 return (ENOMEM);
964
965 NET_EPOCH_ENTER(et);
966 NHOPS_RLOCK(ctl);
967 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
968 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w);
969 if (error != 0)
970 break;
971 } CHT_SLIST_FOREACH_END;
972 NHOPS_RUNLOCK(ctl);
973 NET_EPOCH_EXIT(et);
974
975 free(buffer, M_TEMP);
976
977 return (error);
978 }
Cache object: e38c4d103b7556385b8fe16338f39ef9
|