FreeBSD/Linux Kernel Cross Reference
sys/net/route.c
1 /* $OpenBSD: route.c,v 1.416 2023/01/28 10:17:16 mvs Exp $ */
2 /* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1980, 1986, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)route.c 8.2 (Berkeley) 11/15/93
62 */
63
64 /*
65 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
66 *
67 * NRL grants permission for redistribution and use in source and binary
68 * forms, with or without modification, of the software and documentation
69 * created at NRL provided that the following conditions are met:
70 *
71 * 1. Redistributions of source code must retain the above copyright
72 * notice, this list of conditions and the following disclaimer.
73 * 2. Redistributions in binary form must reproduce the above copyright
74 * notice, this list of conditions and the following disclaimer in the
75 * documentation and/or other materials provided with the distribution.
76 * 3. All advertising materials mentioning features or use of this software
77 * must display the following acknowledgements:
78 * This product includes software developed by the University of
79 * California, Berkeley and its contributors.
80 * This product includes software developed at the Information
81 * Technology Division, US Naval Research Laboratory.
82 * 4. Neither the name of the NRL nor the names of its contributors
83 * may be used to endorse or promote products derived from this software
84 * without specific prior written permission.
85 *
86 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
90 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97 *
98 * The views and conclusions contained in the software and documentation
99 * are those of the authors and should not be interpreted as representing
100 * official policies, either expressed or implied, of the US Naval
101 * Research Laboratory (NRL).
102 */
103
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/mbuf.h>
107 #include <sys/socket.h>
108 #include <sys/socketvar.h>
109 #include <sys/timeout.h>
110 #include <sys/domain.h>
111 #include <sys/ioctl.h>
112 #include <sys/kernel.h>
113 #include <sys/queue.h>
114 #include <sys/pool.h>
115 #include <sys/atomic.h>
116
117 #include <net/if.h>
118 #include <net/if_var.h>
119 #include <net/if_dl.h>
120 #include <net/route.h>
121
122 #include <netinet/in.h>
123 #include <netinet/ip_var.h>
124 #include <netinet/in_var.h>
125
126 #ifdef INET6
127 #include <netinet/ip6.h>
128 #include <netinet6/ip6_var.h>
129 #include <netinet6/in6_var.h>
130 #endif
131
132 #ifdef MPLS
133 #include <netmpls/mpls.h>
134 #endif
135
136 #ifdef BFD
137 #include <net/bfd.h>
138 #endif
139
140 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
141
142 /* Give some jitter to hash, to avoid synchronization between routers. */
143 static uint32_t rt_hashjitter;
144
145 extern unsigned int rtmap_limit;
146
147 struct cpumem * rtcounters;
148 int rttrash; /* routes not in table but not freed */
149
150 struct pool rtentry_pool; /* pool for rtentry structures */
151 struct pool rttimer_pool; /* pool for rttimer structures */
152
153 int rt_setgwroute(struct rtentry *, u_int);
154 void rt_putgwroute(struct rtentry *);
155 int rtflushclone1(struct rtentry *, void *, u_int);
156 int rtflushclone(struct rtentry *, unsigned int);
157 int rt_ifa_purge_walker(struct rtentry *, void *, unsigned int);
158 struct rtentry *rt_match(struct sockaddr *, uint32_t *, int, unsigned int);
159 int rt_clone(struct rtentry **, struct sockaddr *, unsigned int);
160 struct sockaddr *rt_plentosa(sa_family_t, int, struct sockaddr_in6 *);
161 static int rt_copysa(struct sockaddr *, struct sockaddr *, struct sockaddr **);
162
163 #define LABELID_MAX 50000
164
165 struct rt_label {
166 TAILQ_ENTRY(rt_label) rtl_entry;
167 char rtl_name[RTLABEL_LEN];
168 u_int16_t rtl_id;
169 int rtl_ref;
170 };
171
172 TAILQ_HEAD(rt_labels, rt_label) rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels);
173
174 void
175 route_init(void)
176 {
177 rtcounters = counters_alloc(rts_ncounters);
178
179 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, IPL_MPFLOOR, 0,
180 "rtentry", NULL);
181
182 while (rt_hashjitter == 0)
183 rt_hashjitter = arc4random();
184
185 #ifdef BFD
186 bfdinit();
187 #endif
188 }
189
190 /*
191 * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise.
192 */
193 int
194 rtisvalid(struct rtentry *rt)
195 {
196 if (rt == NULL)
197 return (0);
198
199 if (!ISSET(rt->rt_flags, RTF_UP))
200 return (0);
201
202 if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
203 KASSERT(rt->rt_gwroute != NULL);
204 KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY));
205 if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP))
206 return (0);
207 }
208
209 return (1);
210 }
211
212 /*
213 * Do the actual lookup for rtalloc(9), do not use directly!
214 *
215 * Return the best matching entry for the destination ``dst''.
216 *
217 * "RT_RESOLVE" means that a corresponding L2 entry should
218 * be added to the routing table and resolved (via ARP or
219 * NDP), if it does not exist.
220 */
221 struct rtentry *
222 rt_match(struct sockaddr *dst, uint32_t *src, int flags, unsigned int tableid)
223 {
224 struct rtentry *rt = NULL;
225
226 rt = rtable_match(tableid, dst, src);
227 if (rt == NULL) {
228 rtstat_inc(rts_unreach);
229 return (NULL);
230 }
231
232 if (ISSET(rt->rt_flags, RTF_CLONING) && ISSET(flags, RT_RESOLVE))
233 rt_clone(&rt, dst, tableid);
234
235 rt->rt_use++;
236 return (rt);
237 }
238
239 int
240 rt_clone(struct rtentry **rtp, struct sockaddr *dst, unsigned int rtableid)
241 {
242 struct rt_addrinfo info;
243 struct rtentry *rt = *rtp;
244 int error = 0;
245
246 memset(&info, 0, sizeof(info));
247 info.rti_info[RTAX_DST] = dst;
248
249 /*
250 * The priority of cloned route should be different
251 * to avoid conflict with /32 cloning routes.
252 *
253 * It should also be higher to let the ARP layer find
254 * cloned routes instead of the cloning one.
255 */
256 KERNEL_LOCK();
257 error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt,
258 rtableid);
259 KERNEL_UNLOCK();
260 if (error) {
261 rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid);
262 } else {
263 /* Inform listeners of the new route */
264 rtm_send(rt, RTM_ADD, 0, rtableid);
265 rtfree(*rtp);
266 *rtp = rt;
267 }
268 return (error);
269 }
270
271 /*
272 * Originated from bridge_hash() in if_bridge.c
273 */
274 #define mix(a, b, c) do { \
275 a -= b; a -= c; a ^= (c >> 13); \
276 b -= c; b -= a; b ^= (a << 8); \
277 c -= a; c -= b; c ^= (b >> 13); \
278 a -= b; a -= c; a ^= (c >> 12); \
279 b -= c; b -= a; b ^= (a << 16); \
280 c -= a; c -= b; c ^= (b >> 5); \
281 a -= b; a -= c; a ^= (c >> 3); \
282 b -= c; b -= a; b ^= (a << 10); \
283 c -= a; c -= b; c ^= (b >> 15); \
284 } while (0)
285
286 int
287 rt_hash(struct rtentry *rt, struct sockaddr *dst, uint32_t *src)
288 {
289 uint32_t a, b, c;
290
291 if (src == NULL || !rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPATH))
292 return (-1);
293
294 a = b = 0x9e3779b9;
295 c = rt_hashjitter;
296
297 switch (dst->sa_family) {
298 case AF_INET:
299 {
300 struct sockaddr_in *sin;
301
302 if (!ipmultipath)
303 return (-1);
304
305 sin = satosin(dst);
306 a += sin->sin_addr.s_addr;
307 b += src[0];
308 mix(a, b, c);
309 break;
310 }
311 #ifdef INET6
312 case AF_INET6:
313 {
314 struct sockaddr_in6 *sin6;
315
316 if (!ip6_multipath)
317 return (-1);
318
319 sin6 = satosin6(dst);
320 a += sin6->sin6_addr.s6_addr32[0];
321 b += sin6->sin6_addr.s6_addr32[2];
322 c += src[0];
323 mix(a, b, c);
324 a += sin6->sin6_addr.s6_addr32[1];
325 b += sin6->sin6_addr.s6_addr32[3];
326 c += src[1];
327 mix(a, b, c);
328 a += sin6->sin6_addr.s6_addr32[2];
329 b += sin6->sin6_addr.s6_addr32[1];
330 c += src[2];
331 mix(a, b, c);
332 a += sin6->sin6_addr.s6_addr32[3];
333 b += sin6->sin6_addr.s6_addr32[0];
334 c += src[3];
335 mix(a, b, c);
336 break;
337 }
338 #endif /* INET6 */
339 }
340
341 return (c & 0xffff);
342 }
343
344 /*
345 * Allocate a route, potentially using multipath to select the peer.
346 */
347 struct rtentry *
348 rtalloc_mpath(struct sockaddr *dst, uint32_t *src, unsigned int rtableid)
349 {
350 return (rt_match(dst, src, RT_RESOLVE, rtableid));
351 }
352
353 /*
354 * Look in the routing table for the best matching entry for
355 * ``dst''.
356 *
357 * If a route with a gateway is found and its next hop is no
358 * longer valid, try to cache it.
359 */
360 struct rtentry *
361 rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid)
362 {
363 return (rt_match(dst, NULL, flags, rtableid));
364 }
365
366 /*
367 * Cache the route entry corresponding to a reachable next hop in
368 * the gateway entry ``rt''.
369 */
370 int
371 rt_setgwroute(struct rtentry *rt, u_int rtableid)
372 {
373 struct rtentry *prt, *nhrt;
374 unsigned int rdomain = rtable_l2(rtableid);
375 int error;
376
377 NET_ASSERT_LOCKED();
378
379 KASSERT(ISSET(rt->rt_flags, RTF_GATEWAY));
380
381 /* If we cannot find a valid next hop bail. */
382 nhrt = rt_match(rt->rt_gateway, NULL, RT_RESOLVE, rdomain);
383 if (nhrt == NULL)
384 return (ENOENT);
385
386 /* Next hop entry must be on the same interface. */
387 if (nhrt->rt_ifidx != rt->rt_ifidx) {
388 struct sockaddr_in6 sa_mask;
389
390 if (!ISSET(nhrt->rt_flags, RTF_LLINFO) ||
391 !ISSET(nhrt->rt_flags, RTF_CLONED)) {
392 rtfree(nhrt);
393 return (EHOSTUNREACH);
394 }
395
396 /*
397 * We found a L2 entry, so we might have multiple
398 * RTF_CLONING routes for the same subnet. Query
399 * the first route of the multipath chain and iterate
400 * until we find the correct one.
401 */
402 prt = rtable_lookup(rdomain, rt_key(nhrt->rt_parent),
403 rt_plen2mask(nhrt->rt_parent, &sa_mask), NULL, RTP_ANY);
404 rtfree(nhrt);
405
406 while (prt != NULL && prt->rt_ifidx != rt->rt_ifidx)
407 prt = rtable_iterate(prt);
408
409 /* We found nothing or a non-cloning MPATH route. */
410 if (prt == NULL || !ISSET(prt->rt_flags, RTF_CLONING)) {
411 rtfree(prt);
412 return (EHOSTUNREACH);
413 }
414
415 error = rt_clone(&prt, rt->rt_gateway, rdomain);
416 if (error) {
417 rtfree(prt);
418 return (error);
419 }
420 nhrt = prt;
421 }
422
423 /*
424 * Next hop must be reachable, this also prevents rtentry
425 * loops for example when rt->rt_gwroute points to rt.
426 */
427 if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) {
428 rtfree(nhrt);
429 return (ENETUNREACH);
430 }
431
432 /* Next hop is valid so remove possible old cache. */
433 rt_putgwroute(rt);
434 KASSERT(rt->rt_gwroute == NULL);
435
436 /*
437 * If the MTU of next hop is 0, this will reset the MTU of the
438 * route to run PMTUD again from scratch.
439 */
440 if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
441 rt->rt_mtu = nhrt->rt_mtu;
442
443 /*
444 * To avoid reference counting problems when writing link-layer
445 * addresses in an outgoing packet, we ensure that the lifetime
446 * of a cached entry is greater than the bigger lifetime of the
447 * gateway entries it is pointed by.
448 */
449 nhrt->rt_flags |= RTF_CACHED;
450 nhrt->rt_cachecnt++;
451
452 rt->rt_gwroute = nhrt;
453
454 return (0);
455 }
456
457 /*
458 * Invalidate the cached route entry of the gateway entry ``rt''.
459 */
460 void
461 rt_putgwroute(struct rtentry *rt)
462 {
463 struct rtentry *nhrt = rt->rt_gwroute;
464
465 NET_ASSERT_LOCKED();
466
467 if (!ISSET(rt->rt_flags, RTF_GATEWAY) || nhrt == NULL)
468 return;
469
470 KASSERT(ISSET(nhrt->rt_flags, RTF_CACHED));
471 KASSERT(nhrt->rt_cachecnt > 0);
472
473 --nhrt->rt_cachecnt;
474 if (nhrt->rt_cachecnt == 0)
475 nhrt->rt_flags &= ~RTF_CACHED;
476
477 rtfree(rt->rt_gwroute);
478 rt->rt_gwroute = NULL;
479 }
480
481 void
482 rtref(struct rtentry *rt)
483 {
484 refcnt_take(&rt->rt_refcnt);
485 }
486
487 void
488 rtfree(struct rtentry *rt)
489 {
490 if (rt == NULL)
491 return;
492
493 if (refcnt_rele(&rt->rt_refcnt) == 0)
494 return;
495
496 KASSERT(!ISSET(rt->rt_flags, RTF_UP));
497 KASSERT(!RT_ROOT(rt));
498 atomic_dec_int(&rttrash);
499
500 KERNEL_LOCK();
501 rt_timer_remove_all(rt);
502 ifafree(rt->rt_ifa);
503 rtlabel_unref(rt->rt_labelid);
504 #ifdef MPLS
505 rt_mpls_clear(rt);
506 #endif
507 free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len));
508 free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len);
509 KERNEL_UNLOCK();
510
511 pool_put(&rtentry_pool, rt);
512 }
513
514 struct ifaddr *
515 ifaref(struct ifaddr *ifa)
516 {
517 refcnt_take(&ifa->ifa_refcnt);
518 return ifa;
519 }
520
521 void
522 ifafree(struct ifaddr *ifa)
523 {
524 if (refcnt_rele(&ifa->ifa_refcnt) == 0)
525 return;
526 free(ifa, M_IFADDR, 0);
527 }
528
529 /*
530 * Force a routing table entry to the specified
531 * destination to go through the given gateway.
532 * Normally called as a result of a routing redirect
533 * message from the network layer.
534 */
535 void
536 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
537 struct sockaddr *src, struct rtentry **rtp, unsigned int rdomain)
538 {
539 struct rtentry *rt;
540 int error = 0;
541 enum rtstat_counters stat = rts_ncounters;
542 struct rt_addrinfo info;
543 struct ifaddr *ifa;
544 unsigned int ifidx = 0;
545 int flags = RTF_GATEWAY|RTF_HOST;
546 uint8_t prio = RTP_NONE;
547
548 NET_ASSERT_LOCKED();
549
550 /* verify the gateway is directly reachable */
551 rt = rtalloc(gateway, 0, rdomain);
552 if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY)) {
553 rtfree(rt);
554 error = ENETUNREACH;
555 goto out;
556 }
557 ifidx = rt->rt_ifidx;
558 ifa = rt->rt_ifa;
559 rtfree(rt);
560 rt = NULL;
561
562 rt = rtable_lookup(rdomain, dst, NULL, NULL, RTP_ANY);
563 /*
564 * If the redirect isn't from our current router for this dst,
565 * it's either old or wrong. If it redirects us to ourselves,
566 * we have a routing loop, perhaps as a result of an interface
567 * going down recently.
568 */
569 #define equal(a1, a2) \
570 ((a1)->sa_len == (a2)->sa_len && \
571 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
572 if (rt != NULL && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
573 error = EINVAL;
574 else if (ifa_ifwithaddr(gateway, rdomain) != NULL ||
575 (gateway->sa_family == AF_INET &&
576 in_broadcast(satosin(gateway)->sin_addr, rdomain)))
577 error = EHOSTUNREACH;
578 if (error)
579 goto done;
580 /*
581 * Create a new entry if we just got back a wildcard entry
582 * or the lookup failed. This is necessary for hosts
583 * which use routing redirects generated by smart gateways
584 * to dynamically build the routing tables.
585 */
586 if (rt == NULL)
587 goto create;
588 /*
589 * Don't listen to the redirect if it's
590 * for a route to an interface.
591 */
592 if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
593 if (!ISSET(rt->rt_flags, RTF_HOST)) {
594 /*
595 * Changing from route to net => route to host.
596 * Create new route, rather than smashing route to net.
597 */
598 create:
599 rtfree(rt);
600 flags |= RTF_DYNAMIC;
601 bzero(&info, sizeof(info));
602 info.rti_info[RTAX_DST] = dst;
603 info.rti_info[RTAX_GATEWAY] = gateway;
604 info.rti_ifa = ifa;
605 info.rti_flags = flags;
606 rt = NULL;
607 error = rtrequest(RTM_ADD, &info, RTP_DEFAULT, &rt,
608 rdomain);
609 if (error == 0) {
610 flags = rt->rt_flags;
611 prio = rt->rt_priority;
612 }
613 stat = rts_dynamic;
614 } else {
615 /*
616 * Smash the current notion of the gateway to
617 * this destination. Should check about netmask!!!
618 */
619 rt->rt_flags |= RTF_MODIFIED;
620 flags |= RTF_MODIFIED;
621 prio = rt->rt_priority;
622 stat = rts_newgateway;
623 rt_setgate(rt, gateway, rdomain);
624 }
625 } else
626 error = EHOSTUNREACH;
627 done:
628 if (rt) {
629 if (rtp && !error)
630 *rtp = rt;
631 else
632 rtfree(rt);
633 }
634 out:
635 if (error)
636 rtstat_inc(rts_badredirect);
637 else if (stat != rts_ncounters)
638 rtstat_inc(stat);
639 bzero((caddr_t)&info, sizeof(info));
640 info.rti_info[RTAX_DST] = dst;
641 info.rti_info[RTAX_GATEWAY] = gateway;
642 info.rti_info[RTAX_AUTHOR] = src;
643 rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain);
644 }
645
646 /*
647 * Delete a route and generate a message
648 */
649 int
650 rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, u_int tableid)
651 {
652 int error;
653 struct rt_addrinfo info;
654 struct sockaddr_rtlabel sa_rl;
655 struct sockaddr_in6 sa_mask;
656
657 KASSERT(rt->rt_ifidx == ifp->if_index);
658
659 /*
660 * Request the new route so that the entry is not actually
661 * deleted. That will allow the information being reported to
662 * be accurate (and consistent with route_output()).
663 */
664 memset(&info, 0, sizeof(info));
665 info.rti_info[RTAX_DST] = rt_key(rt);
666 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
667 if (!ISSET(rt->rt_flags, RTF_HOST))
668 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
669 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
670 info.rti_flags = rt->rt_flags;
671 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
672 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
673 error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid);
674 rtm_miss(RTM_DELETE, &info, info.rti_flags, rt->rt_priority,
675 rt->rt_ifidx, error, tableid);
676 if (error == 0)
677 rtfree(rt);
678 return (error);
679 }
680
681 static inline int
682 rtequal(struct rtentry *a, struct rtentry *b)
683 {
684 if (a == b)
685 return 1;
686
687 if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 &&
688 rt_plen(a) == rt_plen(b))
689 return 1;
690 else
691 return 0;
692 }
693
694 int
695 rtflushclone1(struct rtentry *rt, void *arg, u_int id)
696 {
697 struct rtentry *cloningrt = arg;
698 struct ifnet *ifp;
699
700 if (!ISSET(rt->rt_flags, RTF_CLONED))
701 return 0;
702
703 /* Cached route must stay alive as long as their parent are alive. */
704 if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt))
705 return 0;
706
707 if (!rtequal(rt->rt_parent, cloningrt))
708 return 0;
709 /*
710 * This happens when an interface with a RTF_CLONING route is
711 * being detached. In this case it's safe to bail because all
712 * the routes are being purged by rt_ifa_purge().
713 */
714 ifp = if_get(rt->rt_ifidx);
715 if (ifp == NULL)
716 return 0;
717
718 if_put(ifp);
719 return EEXIST;
720 }
721
722 int
723 rtflushclone(struct rtentry *parent, unsigned int rtableid)
724 {
725 struct rtentry *rt = NULL;
726 struct ifnet *ifp;
727 int error;
728
729 #ifdef DIAGNOSTIC
730 if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
731 panic("rtflushclone: called with a non-cloning route");
732 #endif
733
734 do {
735 error = rtable_walk(rtableid, rt_key(parent)->sa_family, &rt,
736 rtflushclone1, parent);
737 if (rt != NULL && error == EEXIST) {
738 ifp = if_get(rt->rt_ifidx);
739 if (ifp == NULL) {
740 error = EAGAIN;
741 } else {
742 error = rtdeletemsg(rt, ifp, rtableid);
743 if (error == 0)
744 error = EAGAIN;
745 if_put(ifp);
746 }
747 }
748 rtfree(rt);
749 rt = NULL;
750 } while (error == EAGAIN);
751
752 return error;
753
754 }
755
756 int
757 rtrequest_delete(struct rt_addrinfo *info, u_int8_t prio, struct ifnet *ifp,
758 struct rtentry **ret_nrt, u_int tableid)
759 {
760 struct rtentry *rt;
761 int error;
762
763 NET_ASSERT_LOCKED();
764
765 if (!rtable_exists(tableid))
766 return (EAFNOSUPPORT);
767 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
768 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio);
769 if (rt == NULL)
770 return (ESRCH);
771
772 /* Make sure that's the route the caller want to delete. */
773 if (ifp != NULL && ifp->if_index != rt->rt_ifidx) {
774 rtfree(rt);
775 return (ESRCH);
776 }
777
778 #ifdef BFD
779 if (ISSET(rt->rt_flags, RTF_BFD))
780 bfdclear(rt);
781 #endif
782
783 error = rtable_delete(tableid, info->rti_info[RTAX_DST],
784 info->rti_info[RTAX_NETMASK], rt);
785 if (error != 0) {
786 rtfree(rt);
787 return (ESRCH);
788 }
789
790 /* Release next hop cache before flushing cloned entries. */
791 rt_putgwroute(rt);
792
793 /* Clean up any cloned children. */
794 if (ISSET(rt->rt_flags, RTF_CLONING))
795 rtflushclone(rt, tableid);
796
797 rtfree(rt->rt_parent);
798 rt->rt_parent = NULL;
799
800 rt->rt_flags &= ~RTF_UP;
801
802 KASSERT(ifp->if_index == rt->rt_ifidx);
803 ifp->if_rtrequest(ifp, RTM_DELETE, rt);
804
805 atomic_inc_int(&rttrash);
806
807 if (ret_nrt != NULL)
808 *ret_nrt = rt;
809 else
810 rtfree(rt);
811
812 return (0);
813 }
814
815 int
816 rtrequest(int req, struct rt_addrinfo *info, u_int8_t prio,
817 struct rtentry **ret_nrt, u_int tableid)
818 {
819 struct ifnet *ifp;
820 struct rtentry *rt, *crt;
821 struct ifaddr *ifa;
822 struct sockaddr *ndst;
823 struct sockaddr_rtlabel *sa_rl, sa_rl2;
824 struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK };
825 int error;
826
827 NET_ASSERT_LOCKED();
828
829 if (!rtable_exists(tableid))
830 return (EAFNOSUPPORT);
831 if (info->rti_flags & RTF_HOST)
832 info->rti_info[RTAX_NETMASK] = NULL;
833 switch (req) {
834 case RTM_DELETE:
835 return (EINVAL);
836
837 case RTM_RESOLVE:
838 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
839 return (EINVAL);
840 if ((rt->rt_flags & RTF_CLONING) == 0)
841 return (EINVAL);
842 KASSERT(rt->rt_ifa->ifa_ifp != NULL);
843 info->rti_ifa = rt->rt_ifa;
844 info->rti_flags = rt->rt_flags | (RTF_CLONED|RTF_HOST);
845 info->rti_flags &= ~(RTF_CLONING|RTF_CONNECTED|RTF_STATIC);
846 info->rti_info[RTAX_GATEWAY] = sdltosa(&sa_dl);
847 info->rti_info[RTAX_LABEL] =
848 rtlabel_id2sa(rt->rt_labelid, &sa_rl2);
849 /* FALLTHROUGH */
850
851 case RTM_ADD:
852 if (info->rti_ifa == NULL)
853 return (EINVAL);
854 ifa = info->rti_ifa;
855 ifp = ifa->ifa_ifp;
856 if (prio == 0)
857 prio = ifp->if_priority + RTP_STATIC;
858
859 error = rt_copysa(info->rti_info[RTAX_DST],
860 info->rti_info[RTAX_NETMASK], &ndst);
861 if (error)
862 return (error);
863
864 rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO);
865 if (rt == NULL) {
866 free(ndst, M_RTABLE, ndst->sa_len);
867 return (ENOBUFS);
868 }
869
870 refcnt_init(&rt->rt_refcnt);
871 rt->rt_flags = info->rti_flags | RTF_UP;
872 rt->rt_priority = prio; /* init routing priority */
873 LIST_INIT(&rt->rt_timer);
874
875 /* Check the link state if the table supports it. */
876 if (rtable_mpath_capable(tableid, ndst->sa_family) &&
877 !ISSET(rt->rt_flags, RTF_LOCAL) &&
878 (!LINK_STATE_IS_UP(ifp->if_link_state) ||
879 !ISSET(ifp->if_flags, IFF_UP))) {
880 rt->rt_flags &= ~RTF_UP;
881 rt->rt_priority |= RTP_DOWN;
882 }
883
884 if (info->rti_info[RTAX_LABEL] != NULL) {
885 sa_rl = (struct sockaddr_rtlabel *)
886 info->rti_info[RTAX_LABEL];
887 rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label);
888 }
889
890 #ifdef MPLS
891 /* We have to allocate additional space for MPLS infos */
892 if (info->rti_flags & RTF_MPLS &&
893 (info->rti_info[RTAX_SRC] != NULL ||
894 info->rti_info[RTAX_DST]->sa_family == AF_MPLS)) {
895 error = rt_mpls_set(rt, info->rti_info[RTAX_SRC],
896 info->rti_mpls);
897 if (error) {
898 free(ndst, M_RTABLE, ndst->sa_len);
899 pool_put(&rtentry_pool, rt);
900 return (error);
901 }
902 } else
903 rt_mpls_clear(rt);
904 #endif
905
906 rt->rt_ifa = ifaref(ifa);
907 rt->rt_ifidx = ifp->if_index;
908 /*
909 * Copy metrics and a back pointer from the cloned
910 * route's parent.
911 */
912 if (ISSET(rt->rt_flags, RTF_CLONED)) {
913 rtref(*ret_nrt);
914 rt->rt_parent = *ret_nrt;
915 rt->rt_rmx = (*ret_nrt)->rt_rmx;
916 }
917
918 /*
919 * We must set rt->rt_gateway before adding ``rt'' to
920 * the routing table because the radix MPATH code use
921 * it to (re)order routes.
922 */
923 if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
924 tableid))) {
925 ifafree(ifa);
926 rtfree(rt->rt_parent);
927 rt_putgwroute(rt);
928 free(rt->rt_gateway, M_RTABLE,
929 ROUNDUP(rt->rt_gateway->sa_len));
930 free(ndst, M_RTABLE, ndst->sa_len);
931 pool_put(&rtentry_pool, rt);
932 return (error);
933 }
934
935 error = rtable_insert(tableid, ndst,
936 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
937 rt->rt_priority, rt);
938 if (error != 0 &&
939 (crt = rtable_match(tableid, ndst, NULL)) != NULL) {
940 /* overwrite cloned route */
941 if (ISSET(crt->rt_flags, RTF_CLONED) &&
942 !ISSET(crt->rt_flags, RTF_CACHED)) {
943 struct ifnet *cifp;
944
945 cifp = if_get(crt->rt_ifidx);
946 KASSERT(cifp != NULL);
947 rtdeletemsg(crt, cifp, tableid);
948 if_put(cifp);
949
950 error = rtable_insert(tableid, ndst,
951 info->rti_info[RTAX_NETMASK],
952 info->rti_info[RTAX_GATEWAY],
953 rt->rt_priority, rt);
954 }
955 rtfree(crt);
956 }
957 if (error != 0) {
958 ifafree(ifa);
959 rtfree(rt->rt_parent);
960 rt_putgwroute(rt);
961 free(rt->rt_gateway, M_RTABLE,
962 ROUNDUP(rt->rt_gateway->sa_len));
963 free(ndst, M_RTABLE, ndst->sa_len);
964 pool_put(&rtentry_pool, rt);
965 return (EEXIST);
966 }
967 ifp->if_rtrequest(ifp, req, rt);
968
969 if_group_routechange(info->rti_info[RTAX_DST],
970 info->rti_info[RTAX_NETMASK]);
971
972 if (ret_nrt != NULL)
973 *ret_nrt = rt;
974 else
975 rtfree(rt);
976 break;
977 }
978
979 return (0);
980 }
981
982 int
983 rt_setgate(struct rtentry *rt, struct sockaddr *gate, u_int rtableid)
984 {
985 int glen = ROUNDUP(gate->sa_len);
986 struct sockaddr *sa;
987
988 if (rt->rt_gateway == NULL || glen != ROUNDUP(rt->rt_gateway->sa_len)) {
989 sa = malloc(glen, M_RTABLE, M_NOWAIT);
990 if (sa == NULL)
991 return (ENOBUFS);
992 if (rt->rt_gateway != NULL) {
993 free(rt->rt_gateway, M_RTABLE,
994 ROUNDUP(rt->rt_gateway->sa_len));
995 }
996 rt->rt_gateway = sa;
997 }
998 memmove(rt->rt_gateway, gate, glen);
999
1000 if (ISSET(rt->rt_flags, RTF_GATEWAY))
1001 return (rt_setgwroute(rt, rtableid));
1002
1003 return (0);
1004 }
1005
1006 /*
1007 * Return the route entry containing the next hop link-layer
1008 * address corresponding to ``rt''.
1009 */
1010 struct rtentry *
1011 rt_getll(struct rtentry *rt)
1012 {
1013 if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1014 KASSERT(rt->rt_gwroute != NULL);
1015 return (rt->rt_gwroute);
1016 }
1017
1018 return (rt);
1019 }
1020
1021 void
1022 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1023 struct sockaddr *netmask)
1024 {
1025 u_char *cp1 = (u_char *)src;
1026 u_char *cp2 = (u_char *)dst;
1027 u_char *cp3 = (u_char *)netmask;
1028 u_char *cplim = cp2 + *cp3;
1029 u_char *cplim2 = cp2 + *cp1;
1030
1031 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1032 cp3 += 2;
1033 if (cplim > cplim2)
1034 cplim = cplim2;
1035 while (cp2 < cplim)
1036 *cp2++ = *cp1++ & *cp3++;
1037 if (cp2 < cplim2)
1038 bzero(cp2, cplim2 - cp2);
1039 }
1040
1041 /*
1042 * allocate new sockaddr structure based on the user supplied src and mask
1043 * that is useable for the routing table.
1044 */
1045 static int
1046 rt_copysa(struct sockaddr *src, struct sockaddr *mask, struct sockaddr **dst)
1047 {
1048 static const u_char maskarray[] = {
1049 0x0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1050 struct sockaddr *ndst;
1051 const struct domain *dp;
1052 u_char *csrc, *cdst;
1053 int i, plen;
1054
1055 for (i = 0; (dp = domains[i]) != NULL; i++) {
1056 if (dp->dom_rtoffset == 0)
1057 continue;
1058 if (src->sa_family == dp->dom_family)
1059 break;
1060 }
1061 if (dp == NULL)
1062 return (EAFNOSUPPORT);
1063
1064 if (src->sa_len < dp->dom_sasize)
1065 return (EINVAL);
1066
1067 plen = rtable_satoplen(src->sa_family, mask);
1068 if (plen == -1)
1069 return (EINVAL);
1070
1071 ndst = malloc(dp->dom_sasize, M_RTABLE, M_NOWAIT|M_ZERO);
1072 if (ndst == NULL)
1073 return (ENOBUFS);
1074
1075 ndst->sa_family = src->sa_family;
1076 ndst->sa_len = dp->dom_sasize;
1077
1078 csrc = (u_char *)src + dp->dom_rtoffset;
1079 cdst = (u_char *)ndst + dp->dom_rtoffset;
1080
1081 memcpy(cdst, csrc, plen / 8);
1082 if (plen % 8 != 0)
1083 cdst[plen / 8] = csrc[plen / 8] & maskarray[plen % 8];
1084
1085 *dst = ndst;
1086 return (0);
1087 }
1088
1089 int
1090 rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1091 unsigned int rdomain)
1092 {
1093 struct ifnet *ifp = ifa->ifa_ifp;
1094 struct rtentry *rt;
1095 struct sockaddr_rtlabel sa_rl;
1096 struct rt_addrinfo info;
1097 uint8_t prio = ifp->if_priority + RTP_STATIC;
1098 int error;
1099
1100 KASSERT(rdomain == rtable_l2(rdomain));
1101
1102 memset(&info, 0, sizeof(info));
1103 info.rti_ifa = ifa;
1104 info.rti_flags = flags;
1105 info.rti_info[RTAX_DST] = dst;
1106 if (flags & RTF_LLINFO)
1107 info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl);
1108 else
1109 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1110 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1111
1112 #ifdef MPLS
1113 if ((flags & RTF_MPLS) == RTF_MPLS)
1114 info.rti_mpls = MPLS_OP_POP;
1115 #endif /* MPLS */
1116
1117 if ((flags & RTF_HOST) == 0)
1118 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1119
1120 if (flags & (RTF_LOCAL|RTF_BROADCAST))
1121 prio = RTP_LOCAL;
1122
1123 if (flags & RTF_CONNECTED)
1124 prio = ifp->if_priority + RTP_CONNECTED;
1125
1126 error = rtrequest(RTM_ADD, &info, prio, &rt, rdomain);
1127 if (error == 0) {
1128 /*
1129 * A local route is created for every address configured
1130 * on an interface, so use this information to notify
1131 * userland that a new address has been added.
1132 */
1133 if (flags & RTF_LOCAL)
1134 rtm_addr(RTM_NEWADDR, ifa);
1135 rtm_send(rt, RTM_ADD, 0, rdomain);
1136 rtfree(rt);
1137 }
1138 return (error);
1139 }
1140
1141 int
1142 rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1143 unsigned int rdomain)
1144 {
1145 struct ifnet *ifp = ifa->ifa_ifp;
1146 struct rtentry *rt;
1147 struct mbuf *m = NULL;
1148 struct sockaddr *deldst;
1149 struct rt_addrinfo info;
1150 struct sockaddr_rtlabel sa_rl;
1151 uint8_t prio = ifp->if_priority + RTP_STATIC;
1152 int error;
1153
1154 KASSERT(rdomain == rtable_l2(rdomain));
1155
1156 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1157 m = m_get(M_DONTWAIT, MT_SONAME);
1158 if (m == NULL)
1159 return (ENOBUFS);
1160 deldst = mtod(m, struct sockaddr *);
1161 rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1162 dst = deldst;
1163 }
1164
1165 memset(&info, 0, sizeof(info));
1166 info.rti_ifa = ifa;
1167 info.rti_flags = flags;
1168 info.rti_info[RTAX_DST] = dst;
1169 if ((flags & RTF_LLINFO) == 0)
1170 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1171 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1172
1173 if ((flags & RTF_HOST) == 0)
1174 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1175
1176 if (flags & (RTF_LOCAL|RTF_BROADCAST))
1177 prio = RTP_LOCAL;
1178
1179 if (flags & RTF_CONNECTED)
1180 prio = ifp->if_priority + RTP_CONNECTED;
1181
1182 rtable_clearsource(rdomain, ifa->ifa_addr);
1183 error = rtrequest_delete(&info, prio, ifp, &rt, rdomain);
1184 if (error == 0) {
1185 rtm_send(rt, RTM_DELETE, 0, rdomain);
1186 if (flags & RTF_LOCAL)
1187 rtm_addr(RTM_DELADDR, ifa);
1188 rtfree(rt);
1189 }
1190 m_free(m);
1191
1192 return (error);
1193 }
1194
1195 /*
1196 * Add ifa's address as a local rtentry.
1197 */
1198 int
1199 rt_ifa_addlocal(struct ifaddr *ifa)
1200 {
1201 struct ifnet *ifp = ifa->ifa_ifp;
1202 struct rtentry *rt;
1203 u_int flags = RTF_HOST|RTF_LOCAL;
1204 int error = 0;
1205
1206 /*
1207 * If the configured address correspond to the magical "any"
1208 * address do not add a local route entry because that might
1209 * corrupt the routing tree which uses this value for the
1210 * default routes.
1211 */
1212 switch (ifa->ifa_addr->sa_family) {
1213 case AF_INET:
1214 if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1215 return (0);
1216 break;
1217 #ifdef INET6
1218 case AF_INET6:
1219 if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1220 &in6addr_any))
1221 return (0);
1222 break;
1223 #endif
1224 default:
1225 break;
1226 }
1227
1228 if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1229 flags |= RTF_LLINFO;
1230
1231 /* If there is no local entry, allocate one. */
1232 rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1233 if (rt == NULL || ISSET(rt->rt_flags, flags) != flags) {
1234 error = rt_ifa_add(ifa, flags | RTF_MPATH, ifa->ifa_addr,
1235 ifp->if_rdomain);
1236 }
1237 rtfree(rt);
1238
1239 return (error);
1240 }
1241
1242 /*
1243 * Remove local rtentry of ifa's address if it exists.
1244 */
1245 int
1246 rt_ifa_dellocal(struct ifaddr *ifa)
1247 {
1248 struct ifnet *ifp = ifa->ifa_ifp;
1249 struct rtentry *rt;
1250 u_int flags = RTF_HOST|RTF_LOCAL;
1251 int error = 0;
1252
1253 /*
1254 * We do not add local routes for such address, so do not bother
1255 * removing them.
1256 */
1257 switch (ifa->ifa_addr->sa_family) {
1258 case AF_INET:
1259 if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1260 return (0);
1261 break;
1262 #ifdef INET6
1263 case AF_INET6:
1264 if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1265 &in6addr_any))
1266 return (0);
1267 break;
1268 #endif
1269 default:
1270 break;
1271 }
1272
1273 if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1274 flags |= RTF_LLINFO;
1275
1276 /*
1277 * Before deleting, check if a corresponding local host
1278 * route surely exists. With this check, we can avoid to
1279 * delete an interface direct route whose destination is same
1280 * as the address being removed. This can happen when removing
1281 * a subnet-router anycast address on an interface attached
1282 * to a shared medium.
1283 */
1284 rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1285 if (rt != NULL && ISSET(rt->rt_flags, flags) == flags) {
1286 error = rt_ifa_del(ifa, flags, ifa->ifa_addr,
1287 ifp->if_rdomain);
1288 }
1289 rtfree(rt);
1290
1291 return (error);
1292 }
1293
1294 /*
1295 * Remove all addresses attached to ``ifa''.
1296 */
1297 void
1298 rt_ifa_purge(struct ifaddr *ifa)
1299 {
1300 struct ifnet *ifp = ifa->ifa_ifp;
1301 struct rtentry *rt = NULL;
1302 unsigned int rtableid;
1303 int error, af = ifa->ifa_addr->sa_family;
1304
1305 KASSERT(ifp != NULL);
1306
1307 for (rtableid = 0; rtableid < rtmap_limit; rtableid++) {
1308 /* skip rtables that are not in the rdomain of the ifp */
1309 if (rtable_l2(rtableid) != ifp->if_rdomain)
1310 continue;
1311
1312 do {
1313 error = rtable_walk(rtableid, af, &rt,
1314 rt_ifa_purge_walker, ifa);
1315 if (rt != NULL && error == EEXIST) {
1316 error = rtdeletemsg(rt, ifp, rtableid);
1317 if (error == 0)
1318 error = EAGAIN;
1319 }
1320 rtfree(rt);
1321 rt = NULL;
1322 } while (error == EAGAIN);
1323
1324 if (error == EAFNOSUPPORT)
1325 error = 0;
1326
1327 if (error)
1328 break;
1329 }
1330 }
1331
1332 int
1333 rt_ifa_purge_walker(struct rtentry *rt, void *vifa, unsigned int rtableid)
1334 {
1335 struct ifaddr *ifa = vifa;
1336
1337 if (rt->rt_ifa == ifa)
1338 return EEXIST;
1339
1340 return 0;
1341 }
1342
1343 /*
1344 * Route timer routines. These routes allow functions to be called
1345 * for various routes at any time. This is useful in supporting
1346 * path MTU discovery and redirect route deletion.
1347 *
1348 * This is similar to some BSDI internal functions, but it provides
1349 * for multiple queues for efficiency's sake...
1350 */
1351
1352 struct mutex rttimer_mtx;
1353
1354 struct rttimer {
1355 TAILQ_ENTRY(rttimer) rtt_next; /* [T] entry on timer queue */
1356 LIST_ENTRY(rttimer) rtt_link; /* [T] timers per rtentry */
1357 struct timeout rtt_timeout; /* [I] timeout for this entry */
1358 struct rttimer_queue *rtt_queue; /* [I] back pointer to queue */
1359 struct rtentry *rtt_rt; /* [T] back pointer to route */
1360 time_t rtt_expire; /* [I] rt expire time */
1361 u_int rtt_tableid; /* [I] rtable id of rtt_rt */
1362 };
1363
1364 #define RTTIMER_CALLOUT(r) { \
1365 if (r->rtt_queue->rtq_func != NULL) { \
1366 (*r->rtt_queue->rtq_func)(r->rtt_rt, r->rtt_tableid); \
1367 } else { \
1368 struct ifnet *ifp; \
1369 \
1370 ifp = if_get(r->rtt_rt->rt_ifidx); \
1371 if (ifp != NULL && \
1372 (r->rtt_rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == \
1373 (RTF_DYNAMIC|RTF_HOST)) \
1374 rtdeletemsg(r->rtt_rt, ifp, r->rtt_tableid); \
1375 if_put(ifp); \
1376 } \
1377 }
1378
1379 /*
1380 * Some subtle order problems with domain initialization mean that
1381 * we cannot count on this being run from rt_init before various
1382 * protocol initializations are done. Therefore, we make sure
1383 * that this is run when the first queue is added...
1384 */
1385
1386 void
1387 rt_timer_init(void)
1388 {
1389 pool_init(&rttimer_pool, sizeof(struct rttimer), 0,
1390 IPL_MPFLOOR, 0, "rttmr", NULL);
1391 mtx_init(&rttimer_mtx, IPL_MPFLOOR);
1392 }
1393
1394 void
1395 rt_timer_queue_init(struct rttimer_queue *rtq, int timeout,
1396 void (*func)(struct rtentry *, u_int))
1397 {
1398 rtq->rtq_timeout = timeout;
1399 rtq->rtq_count = 0;
1400 rtq->rtq_func = func;
1401 TAILQ_INIT(&rtq->rtq_head);
1402 }
1403
1404 void
1405 rt_timer_queue_change(struct rttimer_queue *rtq, int timeout)
1406 {
1407 mtx_enter(&rttimer_mtx);
1408 rtq->rtq_timeout = timeout;
1409 mtx_leave(&rttimer_mtx);
1410 }
1411
1412 void
1413 rt_timer_queue_flush(struct rttimer_queue *rtq)
1414 {
1415 struct rttimer *r;
1416 TAILQ_HEAD(, rttimer) rttlist;
1417
1418 NET_ASSERT_LOCKED();
1419
1420 TAILQ_INIT(&rttlist);
1421 mtx_enter(&rttimer_mtx);
1422 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1423 LIST_REMOVE(r, rtt_link);
1424 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1425 TAILQ_INSERT_TAIL(&rttlist, r, rtt_next);
1426 KASSERT(rtq->rtq_count > 0);
1427 rtq->rtq_count--;
1428 }
1429 mtx_leave(&rttimer_mtx);
1430
1431 while ((r = TAILQ_FIRST(&rttlist)) != NULL) {
1432 TAILQ_REMOVE(&rttlist, r, rtt_next);
1433 RTTIMER_CALLOUT(r);
1434 pool_put(&rttimer_pool, r);
1435 }
1436 }
1437
1438 unsigned long
1439 rt_timer_queue_count(struct rttimer_queue *rtq)
1440 {
1441 return (rtq->rtq_count);
1442 }
1443
1444 static inline struct rttimer *
1445 rt_timer_unlink(struct rttimer *r)
1446 {
1447 MUTEX_ASSERT_LOCKED(&rttimer_mtx);
1448
1449 LIST_REMOVE(r, rtt_link);
1450 r->rtt_rt = NULL;
1451
1452 if (timeout_del(&r->rtt_timeout) == 0) {
1453 /* timeout fired, so rt_timer_timer will do the cleanup */
1454 return NULL;
1455 }
1456
1457 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1458 KASSERT(r->rtt_queue->rtq_count > 0);
1459 r->rtt_queue->rtq_count--;
1460 return r;
1461 }
1462
1463 void
1464 rt_timer_remove_all(struct rtentry *rt)
1465 {
1466 struct rttimer *r;
1467 TAILQ_HEAD(, rttimer) rttlist;
1468
1469 TAILQ_INIT(&rttlist);
1470 mtx_enter(&rttimer_mtx);
1471 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1472 r = rt_timer_unlink(r);
1473 if (r != NULL)
1474 TAILQ_INSERT_TAIL(&rttlist, r, rtt_next);
1475 }
1476 mtx_leave(&rttimer_mtx);
1477
1478 while ((r = TAILQ_FIRST(&rttlist)) != NULL) {
1479 TAILQ_REMOVE(&rttlist, r, rtt_next);
1480 pool_put(&rttimer_pool, r);
1481 }
1482 }
1483
1484 time_t
1485 rt_timer_get_expire(const struct rtentry *rt)
1486 {
1487 const struct rttimer *r;
1488 time_t expire = 0;
1489
1490 mtx_enter(&rttimer_mtx);
1491 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1492 if (expire == 0 || expire > r->rtt_expire)
1493 expire = r->rtt_expire;
1494 }
1495 mtx_leave(&rttimer_mtx);
1496
1497 return expire;
1498 }
1499
1500 int
1501 rt_timer_add(struct rtentry *rt, struct rttimer_queue *queue, u_int rtableid)
1502 {
1503 struct rttimer *r, *rnew;
1504
1505 rnew = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO);
1506 if (rnew == NULL)
1507 return (ENOBUFS);
1508
1509 rnew->rtt_rt = rt;
1510 rnew->rtt_queue = queue;
1511 rnew->rtt_tableid = rtableid;
1512 rnew->rtt_expire = getuptime() + queue->rtq_timeout;
1513 timeout_set_proc(&rnew->rtt_timeout, rt_timer_timer, rnew);
1514
1515 mtx_enter(&rttimer_mtx);
1516 /*
1517 * If there's already a timer with this action, destroy it before
1518 * we add a new one.
1519 */
1520 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1521 if (r->rtt_queue == queue) {
1522 r = rt_timer_unlink(r);
1523 break; /* only one per list, so we can quit... */
1524 }
1525 }
1526
1527 LIST_INSERT_HEAD(&rt->rt_timer, rnew, rtt_link);
1528 TAILQ_INSERT_TAIL(&queue->rtq_head, rnew, rtt_next);
1529 timeout_add_sec(&rnew->rtt_timeout, queue->rtq_timeout);
1530 rnew->rtt_queue->rtq_count++;
1531 mtx_leave(&rttimer_mtx);
1532
1533 if (r != NULL)
1534 pool_put(&rttimer_pool, r);
1535
1536 return (0);
1537 }
1538
1539 void
1540 rt_timer_timer(void *arg)
1541 {
1542 struct rttimer *r = arg;
1543 struct rttimer_queue *rtq = r->rtt_queue;
1544
1545 NET_LOCK();
1546 mtx_enter(&rttimer_mtx);
1547
1548 if (r->rtt_rt != NULL)
1549 LIST_REMOVE(r, rtt_link);
1550 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1551 KASSERT(rtq->rtq_count > 0);
1552 rtq->rtq_count--;
1553
1554 mtx_leave(&rttimer_mtx);
1555
1556 if (r->rtt_rt != NULL)
1557 RTTIMER_CALLOUT(r);
1558 NET_UNLOCK();
1559
1560 pool_put(&rttimer_pool, r);
1561 }
1562
1563 #ifdef MPLS
1564 int
1565 rt_mpls_set(struct rtentry *rt, struct sockaddr *src, uint8_t op)
1566 {
1567 struct sockaddr_mpls *psa_mpls = (struct sockaddr_mpls *)src;
1568 struct rt_mpls *rt_mpls;
1569
1570 if (psa_mpls == NULL && op != MPLS_OP_POP)
1571 return (EOPNOTSUPP);
1572 if (psa_mpls != NULL && psa_mpls->smpls_len != sizeof(*psa_mpls))
1573 return (EINVAL);
1574 if (psa_mpls != NULL && psa_mpls->smpls_family != AF_MPLS)
1575 return (EAFNOSUPPORT);
1576
1577 rt->rt_llinfo = malloc(sizeof(struct rt_mpls), M_TEMP, M_NOWAIT|M_ZERO);
1578 if (rt->rt_llinfo == NULL)
1579 return (ENOMEM);
1580
1581 rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
1582 if (psa_mpls != NULL)
1583 rt_mpls->mpls_label = psa_mpls->smpls_label;
1584 rt_mpls->mpls_operation = op;
1585 /* XXX: set experimental bits */
1586 rt->rt_flags |= RTF_MPLS;
1587
1588 return (0);
1589 }
1590
1591 void
1592 rt_mpls_clear(struct rtentry *rt)
1593 {
1594 if (rt->rt_llinfo != NULL && rt->rt_flags & RTF_MPLS) {
1595 free(rt->rt_llinfo, M_TEMP, sizeof(struct rt_mpls));
1596 rt->rt_llinfo = NULL;
1597 }
1598 rt->rt_flags &= ~RTF_MPLS;
1599 }
1600 #endif
1601
1602 u_int16_t
1603 rtlabel_name2id(char *name)
1604 {
1605 struct rt_label *label, *p;
1606 u_int16_t new_id = 1;
1607
1608 if (!name[0])
1609 return (0);
1610
1611 TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1612 if (strcmp(name, label->rtl_name) == 0) {
1613 label->rtl_ref++;
1614 return (label->rtl_id);
1615 }
1616
1617 /*
1618 * to avoid fragmentation, we do a linear search from the beginning
1619 * and take the first free slot we find. if there is none or the list
1620 * is empty, append a new entry at the end.
1621 */
1622 TAILQ_FOREACH(p, &rt_labels, rtl_entry) {
1623 if (p->rtl_id != new_id)
1624 break;
1625 new_id = p->rtl_id + 1;
1626 }
1627 if (new_id > LABELID_MAX)
1628 return (0);
1629
1630 label = malloc(sizeof(*label), M_RTABLE, M_NOWAIT|M_ZERO);
1631 if (label == NULL)
1632 return (0);
1633 strlcpy(label->rtl_name, name, sizeof(label->rtl_name));
1634 label->rtl_id = new_id;
1635 label->rtl_ref++;
1636
1637 if (p != NULL) /* insert new entry before p */
1638 TAILQ_INSERT_BEFORE(p, label, rtl_entry);
1639 else /* either list empty or no free slot in between */
1640 TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry);
1641
1642 return (label->rtl_id);
1643 }
1644
1645 const char *
1646 rtlabel_id2name(u_int16_t id)
1647 {
1648 struct rt_label *label;
1649
1650 TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1651 if (label->rtl_id == id)
1652 return (label->rtl_name);
1653
1654 return (NULL);
1655 }
1656
1657 struct sockaddr *
1658 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl)
1659 {
1660 const char *label;
1661
1662 if (labelid == 0 || (label = rtlabel_id2name(labelid)) == NULL)
1663 return (NULL);
1664
1665 bzero(sa_rl, sizeof(*sa_rl));
1666 sa_rl->sr_len = sizeof(*sa_rl);
1667 sa_rl->sr_family = AF_UNSPEC;
1668 strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label));
1669
1670 return ((struct sockaddr *)sa_rl);
1671 }
1672
1673 void
1674 rtlabel_unref(u_int16_t id)
1675 {
1676 struct rt_label *p, *next;
1677
1678 if (id == 0)
1679 return;
1680
1681 TAILQ_FOREACH_SAFE(p, &rt_labels, rtl_entry, next) {
1682 if (id == p->rtl_id) {
1683 if (--p->rtl_ref == 0) {
1684 TAILQ_REMOVE(&rt_labels, p, rtl_entry);
1685 free(p, M_RTABLE, sizeof(*p));
1686 }
1687 break;
1688 }
1689 }
1690 }
1691
1692 int
1693 rt_if_track(struct ifnet *ifp)
1694 {
1695 unsigned int rtableid;
1696 struct rtentry *rt = NULL;
1697 int i, error = 0;
1698
1699 for (rtableid = 0; rtableid < rtmap_limit; rtableid++) {
1700 /* skip rtables that are not in the rdomain of the ifp */
1701 if (rtable_l2(rtableid) != ifp->if_rdomain)
1702 continue;
1703 for (i = 1; i <= AF_MAX; i++) {
1704 if (!rtable_mpath_capable(rtableid, i))
1705 continue;
1706
1707 do {
1708 error = rtable_walk(rtableid, i, &rt,
1709 rt_if_linkstate_change, ifp);
1710 if (rt != NULL && error == EEXIST) {
1711 error = rtdeletemsg(rt, ifp, rtableid);
1712 if (error == 0)
1713 error = EAGAIN;
1714 }
1715 rtfree(rt);
1716 rt = NULL;
1717 } while (error == EAGAIN);
1718
1719 if (error == EAFNOSUPPORT)
1720 error = 0;
1721
1722 if (error)
1723 break;
1724 }
1725 }
1726
1727 return (error);
1728 }
1729
1730 int
1731 rt_if_linkstate_change(struct rtentry *rt, void *arg, u_int id)
1732 {
1733 struct ifnet *ifp = arg;
1734 struct sockaddr_in6 sa_mask;
1735 int error;
1736
1737 if (rt->rt_ifidx != ifp->if_index)
1738 return (0);
1739
1740 /* Local routes are always usable. */
1741 if (rt->rt_flags & RTF_LOCAL) {
1742 rt->rt_flags |= RTF_UP;
1743 return (0);
1744 }
1745
1746 if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP) {
1747 if (ISSET(rt->rt_flags, RTF_UP))
1748 return (0);
1749
1750 /* bring route up */
1751 rt->rt_flags |= RTF_UP;
1752 error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1753 rt->rt_priority & RTP_MASK, rt);
1754 } else {
1755 /*
1756 * Remove redirected and cloned routes (mainly ARP)
1757 * from down interfaces so we have a chance to get
1758 * new routes from a better source.
1759 */
1760 if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) &&
1761 !ISSET(rt->rt_flags, RTF_CACHED|RTF_BFD)) {
1762 return (EEXIST);
1763 }
1764
1765 if (!ISSET(rt->rt_flags, RTF_UP))
1766 return (0);
1767
1768 /* take route down */
1769 rt->rt_flags &= ~RTF_UP;
1770 error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1771 rt->rt_priority | RTP_DOWN, rt);
1772 }
1773 if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask));
1774
1775 return (error);
1776 }
1777
1778 struct sockaddr *
1779 rt_plentosa(sa_family_t af, int plen, struct sockaddr_in6 *sa_mask)
1780 {
1781 struct sockaddr_in *sin = (struct sockaddr_in *)sa_mask;
1782 #ifdef INET6
1783 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa_mask;
1784 #endif
1785
1786 KASSERT(plen >= 0 || plen == -1);
1787
1788 if (plen == -1)
1789 return (NULL);
1790
1791 memset(sa_mask, 0, sizeof(*sa_mask));
1792
1793 switch (af) {
1794 case AF_INET:
1795 sin->sin_family = AF_INET;
1796 sin->sin_len = sizeof(struct sockaddr_in);
1797 in_prefixlen2mask(&sin->sin_addr, plen);
1798 break;
1799 #ifdef INET6
1800 case AF_INET6:
1801 sin6->sin6_family = AF_INET6;
1802 sin6->sin6_len = sizeof(struct sockaddr_in6);
1803 in6_prefixlen2mask(&sin6->sin6_addr, plen);
1804 break;
1805 #endif /* INET6 */
1806 default:
1807 return (NULL);
1808 }
1809
1810 return ((struct sockaddr *)sa_mask);
1811 }
1812
1813 struct sockaddr *
1814 rt_plen2mask(struct rtentry *rt, struct sockaddr_in6 *sa_mask)
1815 {
1816 return (rt_plentosa(rt_key(rt)->sa_family, rt_plen(rt), sa_mask));
1817 }
1818
1819 #ifdef DDB
1820 #include <machine/db_machdep.h>
1821 #include <ddb/db_output.h>
1822
1823 void db_print_sa(struct sockaddr *);
1824 void db_print_ifa(struct ifaddr *);
1825
1826 void
1827 db_print_sa(struct sockaddr *sa)
1828 {
1829 int len;
1830 u_char *p;
1831
1832 if (sa == NULL) {
1833 db_printf("[NULL]");
1834 return;
1835 }
1836
1837 p = (u_char *)sa;
1838 len = sa->sa_len;
1839 db_printf("[");
1840 while (len > 0) {
1841 db_printf("%d", *p);
1842 p++;
1843 len--;
1844 if (len)
1845 db_printf(",");
1846 }
1847 db_printf("]\n");
1848 }
1849
1850 void
1851 db_print_ifa(struct ifaddr *ifa)
1852 {
1853 if (ifa == NULL)
1854 return;
1855 db_printf(" ifa_addr=");
1856 db_print_sa(ifa->ifa_addr);
1857 db_printf(" ifa_dsta=");
1858 db_print_sa(ifa->ifa_dstaddr);
1859 db_printf(" ifa_mask=");
1860 db_print_sa(ifa->ifa_netmask);
1861 db_printf(" flags=0x%x, refcnt=%u, metric=%d\n",
1862 ifa->ifa_flags, ifa->ifa_refcnt.r_refs, ifa->ifa_metric);
1863 }
1864
1865 /*
1866 * Function to pass to rtable_walk().
1867 * Return non-zero error to abort walk.
1868 */
1869 int
1870 db_show_rtentry(struct rtentry *rt, void *w, unsigned int id)
1871 {
1872 db_printf("rtentry=%p", rt);
1873
1874 db_printf(" flags=0x%x refcnt=%u use=%llu expire=%lld\n",
1875 rt->rt_flags, rt->rt_refcnt.r_refs, rt->rt_use, rt->rt_expire);
1876
1877 db_printf(" key="); db_print_sa(rt_key(rt));
1878 db_printf(" plen=%d", rt_plen(rt));
1879 db_printf(" gw="); db_print_sa(rt->rt_gateway);
1880 db_printf(" ifidx=%u ", rt->rt_ifidx);
1881 db_printf(" ifa=%p\n", rt->rt_ifa);
1882 db_print_ifa(rt->rt_ifa);
1883
1884 db_printf(" gwroute=%p llinfo=%p priority=%d\n",
1885 rt->rt_gwroute, rt->rt_llinfo, rt->rt_priority);
1886 return (0);
1887 }
1888
1889 /*
1890 * Function to print all the route trees.
1891 */
1892 int
1893 db_show_rtable(int af, unsigned int rtableid)
1894 {
1895 db_printf("Route tree for af %d, rtableid %u\n", af, rtableid);
1896 rtable_walk(rtableid, af, NULL, db_show_rtentry, NULL);
1897 return (0);
1898 }
1899 #endif /* DDB */
Cache object: 3cb10f6ff6b90b2195fe29b7d98820dd
|