FreeBSD/Linux Kernel Cross Reference
sys/net/rtsock.c
1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1988, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95
32 * $FreeBSD$
33 */
34 #include "opt_ddb.h"
35 #include "opt_route.h"
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #include <sys/param.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/eventhandler.h>
43 #include <sys/domain.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/priv.h>
48 #include <sys/proc.h>
49 #include <sys/protosw.h>
50 #include <sys/rmlock.h>
51 #include <sys/rwlock.h>
52 #include <sys/signalvar.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <sys/sysctl.h>
56 #include <sys/systm.h>
57
58 #include <net/if.h>
59 #include <net/if_var.h>
60 #include <net/if_private.h>
61 #include <net/if_dl.h>
62 #include <net/if_llatbl.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
66 #include <net/route/route_ctl.h>
67 #include <net/route/route_var.h>
68 #include <net/vnet.h>
69
70 #include <netinet/in.h>
71 #include <netinet/if_ether.h>
72 #include <netinet/ip_carp.h>
73 #ifdef INET6
74 #include <netinet6/in6_var.h>
75 #include <netinet6/ip6_var.h>
76 #include <netinet6/scope6_var.h>
77 #endif
78 #include <net/route/nhop.h>
79
80 #define DEBUG_MOD_NAME rtsock
81 #define DEBUG_MAX_LEVEL LOG_DEBUG
82 #include <net/route/route_debug.h>
83 _DECLARE_DEBUG(LOG_INFO);
84
85 #ifdef COMPAT_FREEBSD32
86 #include <sys/mount.h>
87 #include <compat/freebsd32/freebsd32.h>
88
89 struct if_msghdr32 {
90 uint16_t ifm_msglen;
91 uint8_t ifm_version;
92 uint8_t ifm_type;
93 int32_t ifm_addrs;
94 int32_t ifm_flags;
95 uint16_t ifm_index;
96 uint16_t _ifm_spare1;
97 struct if_data ifm_data;
98 };
99
100 struct if_msghdrl32 {
101 uint16_t ifm_msglen;
102 uint8_t ifm_version;
103 uint8_t ifm_type;
104 int32_t ifm_addrs;
105 int32_t ifm_flags;
106 uint16_t ifm_index;
107 uint16_t _ifm_spare1;
108 uint16_t ifm_len;
109 uint16_t ifm_data_off;
110 uint32_t _ifm_spare2;
111 struct if_data ifm_data;
112 };
113
114 struct ifa_msghdrl32 {
115 uint16_t ifam_msglen;
116 uint8_t ifam_version;
117 uint8_t ifam_type;
118 int32_t ifam_addrs;
119 int32_t ifam_flags;
120 uint16_t ifam_index;
121 uint16_t _ifam_spare1;
122 uint16_t ifam_len;
123 uint16_t ifam_data_off;
124 int32_t ifam_metric;
125 struct if_data ifam_data;
126 };
127
128 #define SA_SIZE32(sa) \
129 ( (((struct sockaddr *)(sa))->sa_len == 0) ? \
130 sizeof(int) : \
131 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(int) - 1) ) )
132
133 #endif /* COMPAT_FREEBSD32 */
134
135 struct linear_buffer {
136 char *base; /* Base allocated memory pointer */
137 uint32_t offset; /* Currently used offset */
138 uint32_t size; /* Total buffer size */
139 };
140 #define SCRATCH_BUFFER_SIZE 1024
141
142 #define RTS_PID_LOG(_l, _fmt, ...) RT_LOG_##_l(_l, "PID %d: " _fmt, curproc ? curproc->p_pid : 0, ## __VA_ARGS__)
143
144 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
145
146 /* NB: these are not modified */
147 static struct sockaddr route_src = { 2, PF_ROUTE, };
148 static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
149
150 /* These are external hooks for CARP. */
151 int (*carp_get_vhid_p)(struct ifaddr *);
152
153 /*
154 * Used by rtsock callback code to decide whether to filter the update
155 * notification to a socket bound to a particular FIB.
156 */
157 #define RTS_FILTER_FIB M_PROTO8
158 /*
159 * Used to store address family of the notification.
160 */
161 #define m_rtsock_family m_pkthdr.PH_loc.eight[0]
162
163 struct rcb {
164 LIST_ENTRY(rcb) list;
165 struct socket *rcb_socket;
166 sa_family_t rcb_family;
167 };
168
169 typedef struct {
170 LIST_HEAD(, rcb) cblist;
171 int ip_count; /* attached w/ AF_INET */
172 int ip6_count; /* attached w/ AF_INET6 */
173 int any_count; /* total attached */
174 } route_cb_t;
175 VNET_DEFINE_STATIC(route_cb_t, route_cb);
176 #define V_route_cb VNET(route_cb)
177
178 struct mtx rtsock_mtx;
179 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
180
181 #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx)
182 #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
183 #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)
184
185 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
186
187 struct walkarg {
188 int family;
189 int w_tmemsize;
190 int w_op, w_arg;
191 caddr_t w_tmem;
192 struct sysctl_req *w_req;
193 struct sockaddr *dst;
194 struct sockaddr *mask;
195 };
196
197 static void rts_input(struct mbuf *m);
198 static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
199 static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
200 struct walkarg *w, int *plen);
201 static int rt_xaddrs(caddr_t cp, caddr_t cplim,
202 struct rt_addrinfo *rtinfo);
203 static int cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb);
204 static int sysctl_dumpentry(struct rtentry *rt, void *vw);
205 static int sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh,
206 uint32_t weight, struct walkarg *w);
207 static int sysctl_iflist(int af, struct walkarg *w);
208 static int sysctl_ifmalist(int af, struct walkarg *w);
209 static void rt_getmetrics(const struct rtentry *rt,
210 const struct nhop_object *nh, struct rt_metrics *out);
211 static void rt_dispatch(struct mbuf *, sa_family_t);
212 static void rt_ifannouncemsg(struct ifnet *ifp, int what);
213 static int handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
214 struct rt_msghdr *rtm, struct rib_cmd_info *rc);
215 static int update_rtm_from_rc(struct rt_addrinfo *info,
216 struct rt_msghdr **prtm, int alloc_len,
217 struct rib_cmd_info *rc, struct nhop_object *nh);
218 static void send_rtm_reply(struct socket *so, struct rt_msghdr *rtm,
219 struct mbuf *m, sa_family_t saf, u_int fibnum,
220 int rtm_errno);
221 static bool can_export_rte(struct ucred *td_ucred, bool rt_is_host,
222 const struct sockaddr *rt_dst);
223 static void rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc);
224 static void rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask);
225
226 static struct netisr_handler rtsock_nh = {
227 .nh_name = "rtsock",
228 .nh_handler = rts_input,
229 .nh_proto = NETISR_ROUTE,
230 .nh_policy = NETISR_POLICY_SOURCE,
231 };
232
233 static int
234 sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
235 {
236 int error, qlimit;
237
238 netisr_getqlimit(&rtsock_nh, &qlimit);
239 error = sysctl_handle_int(oidp, &qlimit, 0, req);
240 if (error || !req->newptr)
241 return (error);
242 if (qlimit < 1)
243 return (EINVAL);
244 return (netisr_setqlimit(&rtsock_nh, qlimit));
245 }
246 SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen,
247 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
248 0, 0, sysctl_route_netisr_maxqlen, "I",
249 "maximum routing socket dispatch queue length");
250
251 static void
252 vnet_rts_init(void)
253 {
254 int tmp;
255
256 if (IS_DEFAULT_VNET(curvnet)) {
257 if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
258 rtsock_nh.nh_qlimit = tmp;
259 netisr_register(&rtsock_nh);
260 }
261 #ifdef VIMAGE
262 else
263 netisr_register_vnet(&rtsock_nh);
264 #endif
265 }
266 VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
267 vnet_rts_init, 0);
268
269 #ifdef VIMAGE
270 static void
271 vnet_rts_uninit(void)
272 {
273
274 netisr_unregister_vnet(&rtsock_nh);
275 }
276 VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
277 vnet_rts_uninit, 0);
278 #endif
279
280 static void
281 report_route_event(const struct rib_cmd_info *rc, void *_cbdata)
282 {
283 uint32_t fibnum = (uint32_t)(uintptr_t)_cbdata;
284 struct nhop_object *nh;
285
286 nh = rc->rc_cmd == RTM_DELETE ? rc->rc_nh_old : rc->rc_nh_new;
287 rt_routemsg(rc->rc_cmd, rc->rc_rt, nh, fibnum);
288 }
289
290 static void
291 rts_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
292 {
293 #ifdef ROUTE_MPATH
294 if ((rc->rc_nh_new && NH_IS_NHGRP(rc->rc_nh_new)) ||
295 (rc->rc_nh_old && NH_IS_NHGRP(rc->rc_nh_old))) {
296 rib_decompose_notification(rc, report_route_event,
297 (void *)(uintptr_t)fibnum);
298 } else
299 #endif
300 report_route_event(rc, (void *)(uintptr_t)fibnum);
301 }
302 static struct rtbridge rtsbridge = {
303 .route_f = rts_handle_route_event,
304 .ifmsg_f = rtsock_ifmsg,
305 };
306 static struct rtbridge *rtsbridge_orig_p;
307
308 static void
309 rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc)
310 {
311 netlink_callback_p->route_f(fibnum, rc);
312 }
313
314 static void
315 rtsock_init(void)
316 {
317 rtsbridge_orig_p = rtsock_callback_p;
318 rtsock_callback_p = &rtsbridge;
319 }
320 SYSINIT(rtsock_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtsock_init, NULL);
321
322 static void
323 rts_handle_ifnet_arrival(void *arg __unused, struct ifnet *ifp)
324 {
325 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
326 }
327 EVENTHANDLER_DEFINE(ifnet_arrival_event, rts_handle_ifnet_arrival, NULL, 0);
328
329 static void
330 rts_handle_ifnet_departure(void *arg __unused, struct ifnet *ifp)
331 {
332 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
333 }
334 EVENTHANDLER_DEFINE(ifnet_departure_event, rts_handle_ifnet_departure, NULL, 0);
335
336 static void
337 rts_append_data(struct socket *so, struct mbuf *m)
338 {
339
340 if (sbappendaddr(&so->so_rcv, &route_src, m, NULL) == 0) {
341 soroverflow(so);
342 m_freem(m);
343 } else
344 sorwakeup(so);
345 }
346
347 static void
348 rts_input(struct mbuf *m)
349 {
350 struct rcb *rcb;
351 struct socket *last;
352
353 last = NULL;
354 RTSOCK_LOCK();
355 LIST_FOREACH(rcb, &V_route_cb.cblist, list) {
356 if (rcb->rcb_family != AF_UNSPEC &&
357 rcb->rcb_family != m->m_rtsock_family)
358 continue;
359 if ((m->m_flags & RTS_FILTER_FIB) &&
360 M_GETFIB(m) != rcb->rcb_socket->so_fibnum)
361 continue;
362 if (last != NULL) {
363 struct mbuf *n;
364
365 n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
366 if (n != NULL)
367 rts_append_data(last, n);
368 }
369 last = rcb->rcb_socket;
370 }
371 if (last != NULL)
372 rts_append_data(last, m);
373 else
374 m_freem(m);
375 RTSOCK_UNLOCK();
376 }
377
378 static void
379 rts_close(struct socket *so)
380 {
381
382 soisdisconnected(so);
383 }
384
385 static SYSCTL_NODE(_net, OID_AUTO, rtsock, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
386 "Routing socket infrastructure");
387 static u_long rts_sendspace = 8192;
388 SYSCTL_ULONG(_net_rtsock, OID_AUTO, sendspace, CTLFLAG_RW, &rts_sendspace, 0,
389 "Default routing socket send space");
390 static u_long rts_recvspace = 8192;
391 SYSCTL_ULONG(_net_rtsock, OID_AUTO, recvspace, CTLFLAG_RW, &rts_recvspace, 0,
392 "Default routing socket receive space");
393
394 static int
395 rts_attach(struct socket *so, int proto, struct thread *td)
396 {
397 struct rcb *rcb;
398 int error;
399
400 error = soreserve(so, rts_sendspace, rts_recvspace);
401 if (error)
402 return (error);
403
404 rcb = malloc(sizeof(*rcb), M_PCB, M_WAITOK);
405 rcb->rcb_socket = so;
406 rcb->rcb_family = proto;
407
408 so->so_pcb = rcb;
409 so->so_fibnum = td->td_proc->p_fibnum;
410 so->so_options |= SO_USELOOPBACK;
411
412 RTSOCK_LOCK();
413 LIST_INSERT_HEAD(&V_route_cb.cblist, rcb, list);
414 switch (proto) {
415 case AF_INET:
416 V_route_cb.ip_count++;
417 break;
418 case AF_INET6:
419 V_route_cb.ip6_count++;
420 break;
421 }
422 V_route_cb.any_count++;
423 RTSOCK_UNLOCK();
424 soisconnected(so);
425
426 return (0);
427 }
428
429 static void
430 rts_detach(struct socket *so)
431 {
432 struct rcb *rcb = so->so_pcb;
433
434 RTSOCK_LOCK();
435 LIST_REMOVE(rcb, list);
436 switch(rcb->rcb_family) {
437 case AF_INET:
438 V_route_cb.ip_count--;
439 break;
440 case AF_INET6:
441 V_route_cb.ip6_count--;
442 break;
443 }
444 V_route_cb.any_count--;
445 RTSOCK_UNLOCK();
446 free(rcb, M_PCB);
447 so->so_pcb = NULL;
448 }
449
450 static int
451 rts_disconnect(struct socket *so)
452 {
453
454 return (ENOTCONN);
455 }
456
457 static int
458 rts_shutdown(struct socket *so)
459 {
460
461 socantsendmore(so);
462 return (0);
463 }
464
465 #ifndef _SOCKADDR_UNION_DEFINED
466 #define _SOCKADDR_UNION_DEFINED
467 /*
468 * The union of all possible address formats we handle.
469 */
470 union sockaddr_union {
471 struct sockaddr sa;
472 struct sockaddr_in sin;
473 struct sockaddr_in6 sin6;
474 };
475 #endif /* _SOCKADDR_UNION_DEFINED */
476
477 static int
478 rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
479 struct nhop_object *nh, union sockaddr_union *saun, struct ucred *cred)
480 {
481 #if defined(INET) || defined(INET6)
482 struct epoch_tracker et;
483 #endif
484
485 /* First, see if the returned address is part of the jail. */
486 if (prison_if(cred, nh->nh_ifa->ifa_addr) == 0) {
487 info->rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
488 return (0);
489 }
490
491 switch (info->rti_info[RTAX_DST]->sa_family) {
492 #ifdef INET
493 case AF_INET:
494 {
495 struct in_addr ia;
496 struct ifaddr *ifa;
497 int found;
498
499 found = 0;
500 /*
501 * Try to find an address on the given outgoing interface
502 * that belongs to the jail.
503 */
504 NET_EPOCH_ENTER(et);
505 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
506 struct sockaddr *sa;
507 sa = ifa->ifa_addr;
508 if (sa->sa_family != AF_INET)
509 continue;
510 ia = ((struct sockaddr_in *)sa)->sin_addr;
511 if (prison_check_ip4(cred, &ia) == 0) {
512 found = 1;
513 break;
514 }
515 }
516 NET_EPOCH_EXIT(et);
517 if (!found) {
518 /*
519 * As a last resort return the 'default' jail address.
520 */
521 ia = ((struct sockaddr_in *)nh->nh_ifa->ifa_addr)->
522 sin_addr;
523 if (prison_get_ip4(cred, &ia) != 0)
524 return (ESRCH);
525 }
526 bzero(&saun->sin, sizeof(struct sockaddr_in));
527 saun->sin.sin_len = sizeof(struct sockaddr_in);
528 saun->sin.sin_family = AF_INET;
529 saun->sin.sin_addr.s_addr = ia.s_addr;
530 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
531 break;
532 }
533 #endif
534 #ifdef INET6
535 case AF_INET6:
536 {
537 struct in6_addr ia6;
538 struct ifaddr *ifa;
539 int found;
540
541 found = 0;
542 /*
543 * Try to find an address on the given outgoing interface
544 * that belongs to the jail.
545 */
546 NET_EPOCH_ENTER(et);
547 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
548 struct sockaddr *sa;
549 sa = ifa->ifa_addr;
550 if (sa->sa_family != AF_INET6)
551 continue;
552 bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
553 &ia6, sizeof(struct in6_addr));
554 if (prison_check_ip6(cred, &ia6) == 0) {
555 found = 1;
556 break;
557 }
558 }
559 NET_EPOCH_EXIT(et);
560 if (!found) {
561 /*
562 * As a last resort return the 'default' jail address.
563 */
564 ia6 = ((struct sockaddr_in6 *)nh->nh_ifa->ifa_addr)->
565 sin6_addr;
566 if (prison_get_ip6(cred, &ia6) != 0)
567 return (ESRCH);
568 }
569 bzero(&saun->sin6, sizeof(struct sockaddr_in6));
570 saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
571 saun->sin6.sin6_family = AF_INET6;
572 bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
573 if (sa6_recoverscope(&saun->sin6) != 0)
574 return (ESRCH);
575 info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
576 break;
577 }
578 #endif
579 default:
580 return (ESRCH);
581 }
582 return (0);
583 }
584
585 static int
586 fill_blackholeinfo(struct rt_addrinfo *info, union sockaddr_union *saun)
587 {
588 struct ifaddr *ifa;
589 sa_family_t saf;
590
591 if (V_loif == NULL) {
592 RTS_PID_LOG(LOG_INFO, "Unable to add blackhole/reject nhop without loopback");
593 return (ENOTSUP);
594 }
595 info->rti_ifp = V_loif;
596
597 saf = info->rti_info[RTAX_DST]->sa_family;
598
599 CK_STAILQ_FOREACH(ifa, &info->rti_ifp->if_addrhead, ifa_link) {
600 if (ifa->ifa_addr->sa_family == saf) {
601 info->rti_ifa = ifa;
602 break;
603 }
604 }
605 if (info->rti_ifa == NULL) {
606 RTS_PID_LOG(LOG_INFO, "Unable to find ifa for blackhole/reject nhop");
607 return (ENOTSUP);
608 }
609
610 bzero(saun, sizeof(union sockaddr_union));
611 switch (saf) {
612 #ifdef INET
613 case AF_INET:
614 saun->sin.sin_family = AF_INET;
615 saun->sin.sin_len = sizeof(struct sockaddr_in);
616 saun->sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
617 break;
618 #endif
619 #ifdef INET6
620 case AF_INET6:
621 saun->sin6.sin6_family = AF_INET6;
622 saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
623 saun->sin6.sin6_addr = in6addr_loopback;
624 break;
625 #endif
626 default:
627 RTS_PID_LOG(LOG_INFO, "unsupported family: %d", saf);
628 return (ENOTSUP);
629 }
630 info->rti_info[RTAX_GATEWAY] = &saun->sa;
631 info->rti_flags |= RTF_GATEWAY;
632
633 return (0);
634 }
635
636 /*
637 * Fills in @info based on userland-provided @rtm message.
638 *
639 * Returns 0 on success.
640 */
641 static int
642 fill_addrinfo(struct rt_msghdr *rtm, int len, struct linear_buffer *lb, u_int fibnum,
643 struct rt_addrinfo *info)
644 {
645 int error;
646
647 rtm->rtm_pid = curproc->p_pid;
648 info->rti_addrs = rtm->rtm_addrs;
649
650 info->rti_mflags = rtm->rtm_inits;
651 info->rti_rmx = &rtm->rtm_rmx;
652
653 /*
654 * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
655 * link-local address because rtrequest requires addresses with
656 * embedded scope id.
657 */
658 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, info))
659 return (EINVAL);
660
661 info->rti_flags = rtm->rtm_flags;
662 error = cleanup_xaddrs(info, lb);
663 if (error != 0)
664 return (error);
665 /*
666 * Verify that the caller has the appropriate privilege; RTM_GET
667 * is the only operation the non-superuser is allowed.
668 */
669 if (rtm->rtm_type != RTM_GET) {
670 error = priv_check(curthread, PRIV_NET_ROUTE);
671 if (error != 0)
672 return (error);
673 }
674
675 /*
676 * The given gateway address may be an interface address.
677 * For example, issuing a "route change" command on a route
678 * entry that was created from a tunnel, and the gateway
679 * address given is the local end point. In this case the
680 * RTF_GATEWAY flag must be cleared or the destination will
681 * not be reachable even though there is no error message.
682 */
683 if (info->rti_info[RTAX_GATEWAY] != NULL &&
684 info->rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
685 struct nhop_object *nh;
686
687 /*
688 * A host route through the loopback interface is
689 * installed for each interface adddress. In pre 8.0
690 * releases the interface address of a PPP link type
691 * is not reachable locally. This behavior is fixed as
692 * part of the new L2/L3 redesign and rewrite work. The
693 * signature of this interface address route is the
694 * AF_LINK sa_family type of the gateway, and the
695 * rt_ifp has the IFF_LOOPBACK flag set.
696 */
697 nh = rib_lookup(fibnum, info->rti_info[RTAX_GATEWAY], NHR_NONE, 0);
698 if (nh != NULL && nh->gw_sa.sa_family == AF_LINK &&
699 nh->nh_ifp->if_flags & IFF_LOOPBACK) {
700 info->rti_flags &= ~RTF_GATEWAY;
701 info->rti_flags |= RTF_GWFLAG_COMPAT;
702 }
703 }
704
705 return (0);
706 }
707
708 static struct nhop_object *
709 select_nhop(struct nhop_object *nh, const struct sockaddr *gw)
710 {
711 if (!NH_IS_NHGRP(nh))
712 return (nh);
713 #ifdef ROUTE_MPATH
714 const struct weightened_nhop *wn;
715 uint32_t num_nhops;
716 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
717 if (gw == NULL)
718 return (wn[0].nh);
719 for (int i = 0; i < num_nhops; i++) {
720 if (match_nhop_gw(wn[i].nh, gw))
721 return (wn[i].nh);
722 }
723 #endif
724 return (NULL);
725 }
726
727 /*
728 * Handles RTM_GET message from routing socket, returning matching rt.
729 *
730 * Returns:
731 * 0 on success, with locked and referenced matching rt in @rt_nrt
732 * errno of failure
733 */
734 static int
735 handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
736 struct rt_msghdr *rtm, struct rib_cmd_info *rc)
737 {
738 RIB_RLOCK_TRACKER;
739 struct rib_head *rnh;
740 struct nhop_object *nh;
741 sa_family_t saf;
742
743 saf = info->rti_info[RTAX_DST]->sa_family;
744
745 rnh = rt_tables_get_rnh(fibnum, saf);
746 if (rnh == NULL)
747 return (EAFNOSUPPORT);
748
749 RIB_RLOCK(rnh);
750
751 /*
752 * By (implicit) convention host route (one without netmask)
753 * means longest-prefix-match request and the route with netmask
754 * means exact-match lookup.
755 * As cleanup_xaddrs() cleans up info flags&addrs for the /32,/128
756 * prefixes, use original data to check for the netmask presence.
757 */
758 if ((rtm->rtm_addrs & RTA_NETMASK) == 0) {
759 /*
760 * Provide longest prefix match for
761 * address lookup (no mask).
762 * 'route -n get addr'
763 */
764 rc->rc_rt = (struct rtentry *) rnh->rnh_matchaddr(
765 info->rti_info[RTAX_DST], &rnh->head);
766 } else
767 rc->rc_rt = (struct rtentry *) rnh->rnh_lookup(
768 info->rti_info[RTAX_DST],
769 info->rti_info[RTAX_NETMASK], &rnh->head);
770
771 if (rc->rc_rt == NULL) {
772 RIB_RUNLOCK(rnh);
773 return (ESRCH);
774 }
775
776 nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
777 if (nh == NULL) {
778 RIB_RUNLOCK(rnh);
779 return (ESRCH);
780 }
781 /*
782 * If performing proxied L2 entry insertion, and
783 * the actual PPP host entry is found, perform
784 * another search to retrieve the prefix route of
785 * the local end point of the PPP link.
786 * TODO: move this logic to userland.
787 */
788 if (rtm->rtm_flags & RTF_ANNOUNCE) {
789 struct sockaddr_storage laddr;
790
791 if (nh->nh_ifp != NULL &&
792 nh->nh_ifp->if_type == IFT_PROPVIRTUAL) {
793 struct ifaddr *ifa;
794
795 ifa = ifa_ifwithnet(info->rti_info[RTAX_DST], 1,
796 RT_ALL_FIBS);
797 if (ifa != NULL)
798 rt_maskedcopy(ifa->ifa_addr,
799 (struct sockaddr *)&laddr,
800 ifa->ifa_netmask);
801 } else
802 rt_maskedcopy(nh->nh_ifa->ifa_addr,
803 (struct sockaddr *)&laddr,
804 nh->nh_ifa->ifa_netmask);
805 /*
806 * refactor rt and no lock operation necessary
807 */
808 rc->rc_rt = (struct rtentry *)rnh->rnh_matchaddr(
809 (struct sockaddr *)&laddr, &rnh->head);
810 if (rc->rc_rt == NULL) {
811 RIB_RUNLOCK(rnh);
812 return (ESRCH);
813 }
814 nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
815 if (nh == NULL) {
816 RIB_RUNLOCK(rnh);
817 return (ESRCH);
818 }
819 }
820 rc->rc_nh_new = nh;
821 rc->rc_nh_weight = rc->rc_rt->rt_weight;
822 RIB_RUNLOCK(rnh);
823
824 return (0);
825 }
826
827 static void
828 init_sockaddrs_family(int family, struct sockaddr *dst, struct sockaddr *mask)
829 {
830 #ifdef INET
831 if (family == AF_INET) {
832 struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
833 struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
834
835 bzero(dst4, sizeof(struct sockaddr_in));
836 bzero(mask4, sizeof(struct sockaddr_in));
837
838 dst4->sin_family = AF_INET;
839 dst4->sin_len = sizeof(struct sockaddr_in);
840 mask4->sin_family = AF_INET;
841 mask4->sin_len = sizeof(struct sockaddr_in);
842 }
843 #endif
844 #ifdef INET6
845 if (family == AF_INET6) {
846 struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
847 struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
848
849 bzero(dst6, sizeof(struct sockaddr_in6));
850 bzero(mask6, sizeof(struct sockaddr_in6));
851
852 dst6->sin6_family = AF_INET6;
853 dst6->sin6_len = sizeof(struct sockaddr_in6);
854 mask6->sin6_family = AF_INET6;
855 mask6->sin6_len = sizeof(struct sockaddr_in6);
856 }
857 #endif
858 }
859
860 static void
861 export_rtaddrs(const struct rtentry *rt, struct sockaddr *dst,
862 struct sockaddr *mask)
863 {
864 #ifdef INET
865 if (dst->sa_family == AF_INET) {
866 struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
867 struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
868 uint32_t scopeid = 0;
869 rt_get_inet_prefix_pmask(rt, &dst4->sin_addr, &mask4->sin_addr,
870 &scopeid);
871 return;
872 }
873 #endif
874 #ifdef INET6
875 if (dst->sa_family == AF_INET6) {
876 struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
877 struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
878 uint32_t scopeid = 0;
879 rt_get_inet6_prefix_pmask(rt, &dst6->sin6_addr,
880 &mask6->sin6_addr, &scopeid);
881 dst6->sin6_scope_id = scopeid;
882 return;
883 }
884 #endif
885 }
886
887 static int
888 update_rtm_from_info(struct rt_addrinfo *info, struct rt_msghdr **prtm,
889 int alloc_len)
890 {
891 struct rt_msghdr *rtm, *orig_rtm = NULL;
892 struct walkarg w;
893 int len;
894
895 rtm = *prtm;
896 /* Check if we need to realloc storage */
897 rtsock_msg_buffer(rtm->rtm_type, info, NULL, &len);
898 if (len > alloc_len) {
899 struct rt_msghdr *tmp_rtm;
900
901 tmp_rtm = malloc(len, M_TEMP, M_NOWAIT);
902 if (tmp_rtm == NULL)
903 return (ENOBUFS);
904 bcopy(rtm, tmp_rtm, rtm->rtm_msglen);
905 orig_rtm = rtm;
906 rtm = tmp_rtm;
907 alloc_len = len;
908
909 /*
910 * Delay freeing original rtm as info contains
911 * data referencing it.
912 */
913 }
914
915 w.w_tmem = (caddr_t)rtm;
916 w.w_tmemsize = alloc_len;
917 rtsock_msg_buffer(rtm->rtm_type, info, &w, &len);
918 rtm->rtm_addrs = info->rti_addrs;
919
920 if (orig_rtm != NULL)
921 free(orig_rtm, M_TEMP);
922 *prtm = rtm;
923 return (0);
924 }
925
926
927 /*
928 * Update sockaddrs, flags, etc in @prtm based on @rc data.
929 * rtm can be reallocated.
930 *
931 * Returns 0 on success, along with pointer to (potentially reallocated)
932 * rtm.
933 *
934 */
935 static int
936 update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm,
937 int alloc_len, struct rib_cmd_info *rc, struct nhop_object *nh)
938 {
939 union sockaddr_union saun;
940 struct rt_msghdr *rtm;
941 struct ifnet *ifp;
942 int error;
943
944 rtm = *prtm;
945 union sockaddr_union sa_dst, sa_mask;
946 int family = info->rti_info[RTAX_DST]->sa_family;
947 init_sockaddrs_family(family, &sa_dst.sa, &sa_mask.sa);
948 export_rtaddrs(rc->rc_rt, &sa_dst.sa, &sa_mask.sa);
949
950 info->rti_info[RTAX_DST] = &sa_dst.sa;
951 info->rti_info[RTAX_NETMASK] = rt_is_host(rc->rc_rt) ? NULL : &sa_mask.sa;
952 info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
953 info->rti_info[RTAX_GENMASK] = 0;
954 ifp = nh->nh_ifp;
955 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
956 if (ifp) {
957 info->rti_info[RTAX_IFP] =
958 ifp->if_addr->ifa_addr;
959 error = rtm_get_jailed(info, ifp, nh,
960 &saun, curthread->td_ucred);
961 if (error != 0)
962 return (error);
963 if (ifp->if_flags & IFF_POINTOPOINT)
964 info->rti_info[RTAX_BRD] =
965 nh->nh_ifa->ifa_dstaddr;
966 rtm->rtm_index = ifp->if_index;
967 } else {
968 info->rti_info[RTAX_IFP] = NULL;
969 info->rti_info[RTAX_IFA] = NULL;
970 }
971 } else if (ifp != NULL)
972 rtm->rtm_index = ifp->if_index;
973
974 if ((error = update_rtm_from_info(info, prtm, alloc_len)) != 0)
975 return (error);
976
977 rtm = *prtm;
978 rtm->rtm_flags = rc->rc_rt->rte_flags | nhop_get_rtflags(nh);
979 if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
980 rtm->rtm_flags = RTF_GATEWAY |
981 (rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
982 rt_getmetrics(rc->rc_rt, nh, &rtm->rtm_rmx);
983 rtm->rtm_rmx.rmx_weight = rc->rc_nh_weight;
984
985 return (0);
986 }
987
988 #ifdef ROUTE_MPATH
989 static void
990 save_del_notification(const struct rib_cmd_info *rc, void *_cbdata)
991 {
992 struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
993
994 if (rc->rc_cmd == RTM_DELETE)
995 *rc_new = *rc;
996 }
997
998 static void
999 save_add_notification(const struct rib_cmd_info *rc, void *_cbdata)
1000 {
1001 struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
1002
1003 if (rc->rc_cmd == RTM_ADD)
1004 *rc_new = *rc;
1005 }
1006 #endif
1007
1008 #if defined(INET6) || defined(INET)
1009 static struct sockaddr *
1010 alloc_sockaddr_aligned(struct linear_buffer *lb, int len)
1011 {
1012 len = roundup2(len, sizeof(uint64_t));
1013 if (lb->offset + len > lb->size)
1014 return (NULL);
1015 struct sockaddr *sa = (struct sockaddr *)(lb->base + lb->offset);
1016 lb->offset += len;
1017 return (sa);
1018 }
1019 #endif
1020
1021 static int
1022 rts_send(struct socket *so, int flags, struct mbuf *m,
1023 struct sockaddr *nam, struct mbuf *control, struct thread *td)
1024 {
1025 struct rt_msghdr *rtm = NULL;
1026 struct rt_addrinfo info;
1027 struct epoch_tracker et;
1028 #ifdef INET6
1029 struct sockaddr_storage ss;
1030 struct sockaddr_in6 *sin6;
1031 int i, rti_need_deembed = 0;
1032 #endif
1033 int alloc_len = 0, len, error = 0, fibnum;
1034 sa_family_t saf = AF_UNSPEC;
1035 struct rib_cmd_info rc;
1036 struct nhop_object *nh;
1037
1038 if ((flags & PRUS_OOB) || control != NULL) {
1039 m_freem(m);
1040 if (control != NULL)
1041 m_freem(control);
1042 return (EOPNOTSUPP);
1043 }
1044
1045 fibnum = so->so_fibnum;
1046 #define senderr(e) { error = e; goto flush;}
1047 if (m == NULL || ((m->m_len < sizeof(long)) &&
1048 (m = m_pullup(m, sizeof(long))) == NULL))
1049 return (ENOBUFS);
1050 if ((m->m_flags & M_PKTHDR) == 0)
1051 panic("route_output");
1052 NET_EPOCH_ENTER(et);
1053 len = m->m_pkthdr.len;
1054 if (len < sizeof(*rtm) ||
1055 len != mtod(m, struct rt_msghdr *)->rtm_msglen)
1056 senderr(EINVAL);
1057
1058 /*
1059 * Most of current messages are in range 200-240 bytes,
1060 * minimize possible re-allocation on reply using larger size
1061 * buffer aligned on 1k boundaty.
1062 */
1063 alloc_len = roundup2(len, 1024);
1064 int total_len = alloc_len + SCRATCH_BUFFER_SIZE;
1065 if ((rtm = malloc(total_len, M_TEMP, M_NOWAIT)) == NULL)
1066 senderr(ENOBUFS);
1067
1068 m_copydata(m, 0, len, (caddr_t)rtm);
1069 bzero(&info, sizeof(info));
1070 nh = NULL;
1071 struct linear_buffer lb = {
1072 .base = (char *)rtm + alloc_len,
1073 .size = SCRATCH_BUFFER_SIZE,
1074 };
1075
1076 if (rtm->rtm_version != RTM_VERSION) {
1077 /* Do not touch message since format is unknown */
1078 free(rtm, M_TEMP);
1079 rtm = NULL;
1080 senderr(EPROTONOSUPPORT);
1081 }
1082
1083 /*
1084 * Starting from here, it is possible
1085 * to alter original message and insert
1086 * caller PID and error value.
1087 */
1088
1089 if ((error = fill_addrinfo(rtm, len, &lb, fibnum, &info)) != 0) {
1090 senderr(error);
1091 }
1092 /* fill_addringo() embeds scope into IPv6 addresses */
1093 #ifdef INET6
1094 rti_need_deembed = 1;
1095 #endif
1096
1097 saf = info.rti_info[RTAX_DST]->sa_family;
1098
1099 /* support for new ARP code */
1100 if (rtm->rtm_flags & RTF_LLDATA) {
1101 error = lla_rt_output(rtm, &info);
1102 goto flush;
1103 }
1104
1105 union sockaddr_union gw_saun;
1106 int blackhole_flags = rtm->rtm_flags & (RTF_BLACKHOLE|RTF_REJECT);
1107 if (blackhole_flags != 0) {
1108 if (blackhole_flags != (RTF_BLACKHOLE | RTF_REJECT))
1109 error = fill_blackholeinfo(&info, &gw_saun);
1110 else {
1111 RTS_PID_LOG(LOG_DEBUG, "both BLACKHOLE and REJECT flags specifiied");
1112 error = EINVAL;
1113 }
1114 if (error != 0)
1115 senderr(error);
1116 }
1117
1118 switch (rtm->rtm_type) {
1119 case RTM_ADD:
1120 case RTM_CHANGE:
1121 if (rtm->rtm_type == RTM_ADD) {
1122 if (info.rti_info[RTAX_GATEWAY] == NULL) {
1123 RTS_PID_LOG(LOG_DEBUG, "RTM_ADD w/o gateway");
1124 senderr(EINVAL);
1125 }
1126 }
1127 error = rib_action(fibnum, rtm->rtm_type, &info, &rc);
1128 if (error == 0) {
1129 rtsock_notify_event(fibnum, &rc);
1130 #ifdef ROUTE_MPATH
1131 if (NH_IS_NHGRP(rc.rc_nh_new) ||
1132 (rc.rc_nh_old && NH_IS_NHGRP(rc.rc_nh_old))) {
1133 struct rib_cmd_info rc_simple = {};
1134 rib_decompose_notification(&rc,
1135 save_add_notification, (void *)&rc_simple);
1136 rc = rc_simple;
1137 }
1138 #endif
1139 /* nh MAY be empty if RTM_CHANGE request is no-op */
1140 nh = rc.rc_nh_new;
1141 if (nh != NULL) {
1142 rtm->rtm_index = nh->nh_ifp->if_index;
1143 rtm->rtm_flags = rc.rc_rt->rte_flags | nhop_get_rtflags(nh);
1144 }
1145 }
1146 break;
1147
1148 case RTM_DELETE:
1149 error = rib_action(fibnum, RTM_DELETE, &info, &rc);
1150 if (error == 0) {
1151 rtsock_notify_event(fibnum, &rc);
1152 #ifdef ROUTE_MPATH
1153 if (NH_IS_NHGRP(rc.rc_nh_old) ||
1154 (rc.rc_nh_new && NH_IS_NHGRP(rc.rc_nh_new))) {
1155 struct rib_cmd_info rc_simple = {};
1156 rib_decompose_notification(&rc,
1157 save_del_notification, (void *)&rc_simple);
1158 rc = rc_simple;
1159 }
1160 #endif
1161 nh = rc.rc_nh_old;
1162 }
1163 break;
1164
1165 case RTM_GET:
1166 error = handle_rtm_get(&info, fibnum, rtm, &rc);
1167 if (error != 0)
1168 senderr(error);
1169 nh = rc.rc_nh_new;
1170
1171 if (!can_export_rte(curthread->td_ucred,
1172 info.rti_info[RTAX_NETMASK] == NULL,
1173 info.rti_info[RTAX_DST])) {
1174 senderr(ESRCH);
1175 }
1176 break;
1177
1178 default:
1179 senderr(EOPNOTSUPP);
1180 }
1181
1182 if (error == 0 && nh != NULL) {
1183 error = update_rtm_from_rc(&info, &rtm, alloc_len, &rc, nh);
1184 /*
1185 * Note that some sockaddr pointers may have changed to
1186 * point to memory outsize @rtm. Some may be pointing
1187 * to the on-stack variables.
1188 * Given that, any pointer in @info CANNOT BE USED.
1189 */
1190
1191 /*
1192 * scopeid deembedding has been performed while
1193 * writing updated rtm in rtsock_msg_buffer().
1194 * With that in mind, skip deembedding procedure below.
1195 */
1196 #ifdef INET6
1197 rti_need_deembed = 0;
1198 #endif
1199 }
1200
1201 flush:
1202 NET_EPOCH_EXIT(et);
1203
1204 #ifdef INET6
1205 if (rtm != NULL) {
1206 if (rti_need_deembed) {
1207 /* sin6_scope_id is recovered before sending rtm. */
1208 sin6 = (struct sockaddr_in6 *)&ss;
1209 for (i = 0; i < RTAX_MAX; i++) {
1210 if (info.rti_info[i] == NULL)
1211 continue;
1212 if (info.rti_info[i]->sa_family != AF_INET6)
1213 continue;
1214 bcopy(info.rti_info[i], sin6, sizeof(*sin6));
1215 if (sa6_recoverscope(sin6) == 0)
1216 bcopy(sin6, info.rti_info[i],
1217 sizeof(*sin6));
1218 }
1219 if (update_rtm_from_info(&info, &rtm, alloc_len) != 0) {
1220 if (error != 0)
1221 error = ENOBUFS;
1222 }
1223 }
1224 }
1225 #endif
1226 send_rtm_reply(so, rtm, m, saf, fibnum, error);
1227
1228 return (error);
1229 }
1230
1231 /*
1232 * Sends the prepared reply message in @rtm to all rtsock clients.
1233 * Frees @m and @rtm.
1234 *
1235 */
1236 static void
1237 send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m,
1238 sa_family_t saf, u_int fibnum, int rtm_errno)
1239 {
1240 struct rcb *rcb = NULL;
1241
1242 /*
1243 * Check to see if we don't want our own messages.
1244 */
1245 if ((so->so_options & SO_USELOOPBACK) == 0) {
1246 if (V_route_cb.any_count <= 1) {
1247 if (rtm != NULL)
1248 free(rtm, M_TEMP);
1249 m_freem(m);
1250 return;
1251 }
1252 /* There is another listener, so construct message */
1253 rcb = so->so_pcb;
1254 }
1255
1256 if (rtm != NULL) {
1257 if (rtm_errno!= 0)
1258 rtm->rtm_errno = rtm_errno;
1259 else
1260 rtm->rtm_flags |= RTF_DONE;
1261
1262 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
1263 if (m->m_pkthdr.len < rtm->rtm_msglen) {
1264 m_freem(m);
1265 m = NULL;
1266 } else if (m->m_pkthdr.len > rtm->rtm_msglen)
1267 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
1268
1269 free(rtm, M_TEMP);
1270 }
1271 if (m != NULL) {
1272 M_SETFIB(m, fibnum);
1273 m->m_flags |= RTS_FILTER_FIB;
1274 if (rcb) {
1275 /*
1276 * XXX insure we don't get a copy by
1277 * invalidating our protocol
1278 */
1279 sa_family_t family = rcb->rcb_family;
1280 rcb->rcb_family = AF_UNSPEC;
1281 rt_dispatch(m, saf);
1282 rcb->rcb_family = family;
1283 } else
1284 rt_dispatch(m, saf);
1285 }
1286 }
1287
1288 static void
1289 rt_getmetrics(const struct rtentry *rt, const struct nhop_object *nh,
1290 struct rt_metrics *out)
1291 {
1292
1293 bzero(out, sizeof(*out));
1294 out->rmx_mtu = nh->nh_mtu;
1295 out->rmx_weight = rt->rt_weight;
1296 out->rmx_nhidx = nhop_get_idx(nh);
1297 /* Kernel -> userland timebase conversion. */
1298 out->rmx_expire = nhop_get_expire(nh) ?
1299 nhop_get_expire(nh) - time_uptime + time_second : 0;
1300 }
1301
1302 /*
1303 * Extract the addresses of the passed sockaddrs.
1304 * Do a little sanity checking so as to avoid bad memory references.
1305 * This data is derived straight from userland.
1306 */
1307 static int
1308 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1309 {
1310 struct sockaddr *sa;
1311 int i;
1312
1313 for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
1314 if ((rtinfo->rti_addrs & (1 << i)) == 0)
1315 continue;
1316 sa = (struct sockaddr *)cp;
1317 /*
1318 * It won't fit.
1319 */
1320 if (cp + sa->sa_len > cplim) {
1321 RTS_PID_LOG(LOG_DEBUG, "sa_len too big for sa type %d", i);
1322 return (EINVAL);
1323 }
1324 /*
1325 * there are no more.. quit now
1326 * If there are more bits, they are in error.
1327 * I've seen this. route(1) can evidently generate these.
1328 * This causes kernel to core dump.
1329 * for compatibility, If we see this, point to a safe address.
1330 */
1331 if (sa->sa_len == 0) {
1332 rtinfo->rti_info[i] = &sa_zero;
1333 return (0); /* should be EINVAL but for compat */
1334 }
1335 /* accept it */
1336 #ifdef INET6
1337 if (sa->sa_family == AF_INET6)
1338 sa6_embedscope((struct sockaddr_in6 *)sa,
1339 V_ip6_use_defzone);
1340 #endif
1341 rtinfo->rti_info[i] = sa;
1342 cp += SA_SIZE(sa);
1343 }
1344 return (0);
1345 }
1346
1347 #ifdef INET
1348 static inline void
1349 fill_sockaddr_inet(struct sockaddr_in *sin, struct in_addr addr)
1350 {
1351
1352 const struct sockaddr_in nsin = {
1353 .sin_family = AF_INET,
1354 .sin_len = sizeof(struct sockaddr_in),
1355 .sin_addr = addr,
1356 };
1357 *sin = nsin;
1358 }
1359 #endif
1360
1361 #ifdef INET6
1362 static inline void
1363 fill_sockaddr_inet6(struct sockaddr_in6 *sin6, const struct in6_addr *addr6,
1364 uint32_t scopeid)
1365 {
1366
1367 const struct sockaddr_in6 nsin6 = {
1368 .sin6_family = AF_INET6,
1369 .sin6_len = sizeof(struct sockaddr_in6),
1370 .sin6_addr = *addr6,
1371 .sin6_scope_id = scopeid,
1372 };
1373 *sin6 = nsin6;
1374 }
1375 #endif
1376
1377 #if defined(INET6) || defined(INET)
1378 /*
1379 * Checks if gateway is suitable for lltable operations.
1380 * Lltable code requires AF_LINK gateway with ifindex
1381 * and mac address specified.
1382 * Returns 0 on success.
1383 */
1384 static int
1385 cleanup_xaddrs_lladdr(struct rt_addrinfo *info)
1386 {
1387 struct sockaddr_dl *sdl = (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
1388
1389 if (sdl->sdl_family != AF_LINK)
1390 return (EINVAL);
1391
1392 if (sdl->sdl_index == 0) {
1393 RTS_PID_LOG(LOG_DEBUG, "AF_LINK gateway w/o ifindex");
1394 return (EINVAL);
1395 }
1396
1397 if (offsetof(struct sockaddr_dl, sdl_data) + sdl->sdl_nlen + sdl->sdl_alen > sdl->sdl_len) {
1398 RTS_PID_LOG(LOG_DEBUG, "AF_LINK gw: sdl_nlen/sdl_alen too large");
1399 return (EINVAL);
1400 }
1401
1402 return (0);
1403 }
1404
1405 static int
1406 cleanup_xaddrs_gateway(struct rt_addrinfo *info, struct linear_buffer *lb)
1407 {
1408 struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
1409 struct sockaddr *sa;
1410
1411 if (info->rti_flags & RTF_LLDATA)
1412 return (cleanup_xaddrs_lladdr(info));
1413
1414 switch (gw->sa_family) {
1415 #ifdef INET
1416 case AF_INET:
1417 {
1418 struct sockaddr_in *gw_sin = (struct sockaddr_in *)gw;
1419
1420 /* Ensure reads do not go beyoud SA boundary */
1421 if (SA_SIZE(gw) < offsetof(struct sockaddr_in, sin_zero)) {
1422 RTS_PID_LOG(LOG_DEBUG, "gateway sin_len too small: %d",
1423 gw->sa_len);
1424 return (EINVAL);
1425 }
1426 sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_in));
1427 if (sa == NULL)
1428 return (ENOBUFS);
1429 fill_sockaddr_inet((struct sockaddr_in *)sa, gw_sin->sin_addr);
1430 info->rti_info[RTAX_GATEWAY] = sa;
1431 }
1432 break;
1433 #endif
1434 #ifdef INET6
1435 case AF_INET6:
1436 {
1437 struct sockaddr_in6 *gw_sin6 = (struct sockaddr_in6 *)gw;
1438 if (gw_sin6->sin6_len < sizeof(struct sockaddr_in6)) {
1439 RTS_PID_LOG(LOG_DEBUG, "gateway sin6_len too small: %d",
1440 gw->sa_len);
1441 return (EINVAL);
1442 }
1443 fill_sockaddr_inet6(gw_sin6, &gw_sin6->sin6_addr, 0);
1444 break;
1445 }
1446 #endif
1447 case AF_LINK:
1448 {
1449 struct sockaddr_dl *gw_sdl;
1450
1451 size_t sdl_min_len = offsetof(struct sockaddr_dl, sdl_data);
1452 gw_sdl = (struct sockaddr_dl *)gw;
1453 if (gw_sdl->sdl_len < sdl_min_len) {
1454 RTS_PID_LOG(LOG_DEBUG, "gateway sdl_len too small: %d",
1455 gw_sdl->sdl_len);
1456 return (EINVAL);
1457 }
1458 sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_dl_short));
1459 if (sa == NULL)
1460 return (ENOBUFS);
1461
1462 const struct sockaddr_dl_short sdl = {
1463 .sdl_family = AF_LINK,
1464 .sdl_len = sizeof(struct sockaddr_dl_short),
1465 .sdl_index = gw_sdl->sdl_index,
1466 };
1467 *((struct sockaddr_dl_short *)sa) = sdl;
1468 info->rti_info[RTAX_GATEWAY] = sa;
1469 break;
1470 }
1471 }
1472
1473 return (0);
1474 }
1475 #endif
1476
1477 static void
1478 remove_netmask(struct rt_addrinfo *info)
1479 {
1480 info->rti_info[RTAX_NETMASK] = NULL;
1481 info->rti_flags |= RTF_HOST;
1482 info->rti_addrs &= ~RTA_NETMASK;
1483 }
1484
1485 #ifdef INET
1486 static int
1487 cleanup_xaddrs_inet(struct rt_addrinfo *info, struct linear_buffer *lb)
1488 {
1489 struct sockaddr_in *dst_sa, *mask_sa;
1490 const int sa_len = sizeof(struct sockaddr_in);
1491 struct in_addr dst, mask;
1492
1493 /* Check & fixup dst/netmask combination first */
1494 dst_sa = (struct sockaddr_in *)info->rti_info[RTAX_DST];
1495 mask_sa = (struct sockaddr_in *)info->rti_info[RTAX_NETMASK];
1496
1497 /* Ensure reads do not go beyound the buffer size */
1498 if (SA_SIZE(dst_sa) < offsetof(struct sockaddr_in, sin_zero)) {
1499 RTS_PID_LOG(LOG_DEBUG, "prefix dst sin_len too small: %d",
1500 dst_sa->sin_len);
1501 return (EINVAL);
1502 }
1503
1504 if ((mask_sa != NULL) && mask_sa->sin_len < sizeof(struct sockaddr_in)) {
1505 /*
1506 * Some older routing software encode mask length into the
1507 * sin_len, thus resulting in "truncated" sockaddr.
1508 */
1509 int len = mask_sa->sin_len - offsetof(struct sockaddr_in, sin_addr);
1510 if (len >= 0) {
1511 mask.s_addr = 0;
1512 if (len > sizeof(struct in_addr))
1513 len = sizeof(struct in_addr);
1514 memcpy(&mask, &mask_sa->sin_addr, len);
1515 } else {
1516 RTS_PID_LOG(LOG_DEBUG, "prefix mask sin_len too small: %d",
1517 mask_sa->sin_len);
1518 return (EINVAL);
1519 }
1520 } else
1521 mask.s_addr = mask_sa ? mask_sa->sin_addr.s_addr : INADDR_BROADCAST;
1522
1523 dst.s_addr = htonl(ntohl(dst_sa->sin_addr.s_addr) & ntohl(mask.s_addr));
1524
1525 /* Construct new "clean" dst/mask sockaddresses */
1526 if ((dst_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1527 return (ENOBUFS);
1528 fill_sockaddr_inet(dst_sa, dst);
1529 info->rti_info[RTAX_DST] = (struct sockaddr *)dst_sa;
1530
1531 if (mask.s_addr != INADDR_BROADCAST) {
1532 if ((mask_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1533 return (ENOBUFS);
1534 fill_sockaddr_inet(mask_sa, mask);
1535 info->rti_info[RTAX_NETMASK] = (struct sockaddr *)mask_sa;
1536 info->rti_flags &= ~RTF_HOST;
1537 } else
1538 remove_netmask(info);
1539
1540 /* Check gateway */
1541 if (info->rti_info[RTAX_GATEWAY] != NULL)
1542 return (cleanup_xaddrs_gateway(info, lb));
1543
1544 return (0);
1545 }
1546 #endif
1547
1548 #ifdef INET6
1549 static int
1550 cleanup_xaddrs_inet6(struct rt_addrinfo *info, struct linear_buffer *lb)
1551 {
1552 struct sockaddr *sa;
1553 struct sockaddr_in6 *dst_sa, *mask_sa;
1554 struct in6_addr mask, *dst;
1555 const int sa_len = sizeof(struct sockaddr_in6);
1556
1557 /* Check & fixup dst/netmask combination first */
1558 dst_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_DST];
1559 mask_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_NETMASK];
1560
1561 if (dst_sa->sin6_len < sizeof(struct sockaddr_in6)) {
1562 RTS_PID_LOG(LOG_DEBUG, "prefix dst sin6_len too small: %d",
1563 dst_sa->sin6_len);
1564 return (EINVAL);
1565 }
1566
1567 if (mask_sa && mask_sa->sin6_len < sizeof(struct sockaddr_in6)) {
1568 /*
1569 * Some older routing software encode mask length into the
1570 * sin6_len, thus resulting in "truncated" sockaddr.
1571 */
1572 int len = mask_sa->sin6_len - offsetof(struct sockaddr_in6, sin6_addr);
1573 if (len >= 0) {
1574 bzero(&mask, sizeof(mask));
1575 if (len > sizeof(struct in6_addr))
1576 len = sizeof(struct in6_addr);
1577 memcpy(&mask, &mask_sa->sin6_addr, len);
1578 } else {
1579 RTS_PID_LOG(LOG_DEBUG, "rtsock: prefix mask sin6_len too small: %d",
1580 mask_sa->sin6_len);
1581 return (EINVAL);
1582 }
1583 } else
1584 mask = mask_sa ? mask_sa->sin6_addr : in6mask128;
1585
1586 dst = &dst_sa->sin6_addr;
1587 IN6_MASK_ADDR(dst, &mask);
1588
1589 if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1590 return (ENOBUFS);
1591 fill_sockaddr_inet6((struct sockaddr_in6 *)sa, dst, 0);
1592 info->rti_info[RTAX_DST] = sa;
1593
1594 if (!IN6_ARE_ADDR_EQUAL(&mask, &in6mask128)) {
1595 if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1596 return (ENOBUFS);
1597 fill_sockaddr_inet6((struct sockaddr_in6 *)sa, &mask, 0);
1598 info->rti_info[RTAX_NETMASK] = sa;
1599 info->rti_flags &= ~RTF_HOST;
1600 } else
1601 remove_netmask(info);
1602
1603 /* Check gateway */
1604 if (info->rti_info[RTAX_GATEWAY] != NULL)
1605 return (cleanup_xaddrs_gateway(info, lb));
1606
1607 return (0);
1608 }
1609 #endif
1610
1611 static int
1612 cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb)
1613 {
1614 int error = EAFNOSUPPORT;
1615
1616 if (info->rti_info[RTAX_DST] == NULL) {
1617 RTS_PID_LOG(LOG_DEBUG, "prefix dst is not set");
1618 return (EINVAL);
1619 }
1620
1621 if (info->rti_flags & RTF_LLDATA) {
1622 /*
1623 * arp(8)/ndp(8) sends RTA_NETMASK for the associated
1624 * prefix along with the actual address in RTA_DST.
1625 * Remove netmask to avoid unnecessary address masking.
1626 */
1627 remove_netmask(info);
1628 }
1629
1630 switch (info->rti_info[RTAX_DST]->sa_family) {
1631 #ifdef INET
1632 case AF_INET:
1633 error = cleanup_xaddrs_inet(info, lb);
1634 break;
1635 #endif
1636 #ifdef INET6
1637 case AF_INET6:
1638 error = cleanup_xaddrs_inet6(info, lb);
1639 break;
1640 #endif
1641 }
1642
1643 return (error);
1644 }
1645
1646 /*
1647 * Fill in @dmask with valid netmask leaving original @smask
1648 * intact. Mostly used with radix netmasks.
1649 */
1650 struct sockaddr *
1651 rtsock_fix_netmask(const struct sockaddr *dst, const struct sockaddr *smask,
1652 struct sockaddr_storage *dmask)
1653 {
1654 if (dst == NULL || smask == NULL)
1655 return (NULL);
1656
1657 memset(dmask, 0, dst->sa_len);
1658 memcpy(dmask, smask, smask->sa_len);
1659 dmask->ss_len = dst->sa_len;
1660 dmask->ss_family = dst->sa_family;
1661
1662 return ((struct sockaddr *)dmask);
1663 }
1664
1665 /*
1666 * Writes information related to @rtinfo object to newly-allocated mbuf.
1667 * Assumes MCLBYTES is enough to construct any message.
1668 * Used for OS notifications of vaious events (if/ifa announces,etc)
1669 *
1670 * Returns allocated mbuf or NULL on failure.
1671 */
1672 static struct mbuf *
1673 rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
1674 {
1675 struct sockaddr_storage ss;
1676 struct rt_msghdr *rtm;
1677 struct mbuf *m;
1678 int i;
1679 struct sockaddr *sa;
1680 #ifdef INET6
1681 struct sockaddr_in6 *sin6;
1682 #endif
1683 int len, dlen;
1684
1685 switch (type) {
1686 case RTM_DELADDR:
1687 case RTM_NEWADDR:
1688 len = sizeof(struct ifa_msghdr);
1689 break;
1690
1691 case RTM_DELMADDR:
1692 case RTM_NEWMADDR:
1693 len = sizeof(struct ifma_msghdr);
1694 break;
1695
1696 case RTM_IFINFO:
1697 len = sizeof(struct if_msghdr);
1698 break;
1699
1700 case RTM_IFANNOUNCE:
1701 case RTM_IEEE80211:
1702 len = sizeof(struct if_announcemsghdr);
1703 break;
1704
1705 default:
1706 len = sizeof(struct rt_msghdr);
1707 }
1708
1709 /* XXXGL: can we use MJUMPAGESIZE cluster here? */
1710 KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
1711 if (len > MHLEN)
1712 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1713 else
1714 m = m_gethdr(M_NOWAIT, MT_DATA);
1715 if (m == NULL)
1716 return (m);
1717
1718 m->m_pkthdr.len = m->m_len = len;
1719 rtm = mtod(m, struct rt_msghdr *);
1720 bzero((caddr_t)rtm, len);
1721 for (i = 0; i < RTAX_MAX; i++) {
1722 if ((sa = rtinfo->rti_info[i]) == NULL)
1723 continue;
1724 rtinfo->rti_addrs |= (1 << i);
1725
1726 dlen = SA_SIZE(sa);
1727 KASSERT(dlen <= sizeof(ss),
1728 ("%s: sockaddr size overflow", __func__));
1729 bzero(&ss, sizeof(ss));
1730 bcopy(sa, &ss, sa->sa_len);
1731 sa = (struct sockaddr *)&ss;
1732 #ifdef INET6
1733 if (sa->sa_family == AF_INET6) {
1734 sin6 = (struct sockaddr_in6 *)sa;
1735 (void)sa6_recoverscope(sin6);
1736 }
1737 #endif
1738 m_copyback(m, len, dlen, (caddr_t)sa);
1739 len += dlen;
1740 }
1741 if (m->m_pkthdr.len != len) {
1742 m_freem(m);
1743 return (NULL);
1744 }
1745 rtm->rtm_msglen = len;
1746 rtm->rtm_version = RTM_VERSION;
1747 rtm->rtm_type = type;
1748 return (m);
1749 }
1750
1751 /*
1752 * Writes information related to @rtinfo object to preallocated buffer.
1753 * Stores needed size in @plen. If @w is NULL, calculates size without
1754 * writing.
1755 * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
1756 *
1757 * Returns 0 on success.
1758 *
1759 */
1760 static int
1761 rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
1762 {
1763 struct sockaddr_storage ss;
1764 int len, buflen = 0, dlen, i;
1765 caddr_t cp = NULL;
1766 struct rt_msghdr *rtm = NULL;
1767 #ifdef INET6
1768 struct sockaddr_in6 *sin6;
1769 #endif
1770 #ifdef COMPAT_FREEBSD32
1771 bool compat32 = false;
1772 #endif
1773
1774 switch (type) {
1775 case RTM_DELADDR:
1776 case RTM_NEWADDR:
1777 if (w != NULL && w->w_op == NET_RT_IFLISTL) {
1778 #ifdef COMPAT_FREEBSD32
1779 if (w->w_req->flags & SCTL_MASK32) {
1780 len = sizeof(struct ifa_msghdrl32);
1781 compat32 = true;
1782 } else
1783 #endif
1784 len = sizeof(struct ifa_msghdrl);
1785 } else
1786 len = sizeof(struct ifa_msghdr);
1787 break;
1788
1789 case RTM_IFINFO:
1790 #ifdef COMPAT_FREEBSD32
1791 if (w != NULL && w->w_req->flags & SCTL_MASK32) {
1792 if (w->w_op == NET_RT_IFLISTL)
1793 len = sizeof(struct if_msghdrl32);
1794 else
1795 len = sizeof(struct if_msghdr32);
1796 compat32 = true;
1797 break;
1798 }
1799 #endif
1800 if (w != NULL && w->w_op == NET_RT_IFLISTL)
1801 len = sizeof(struct if_msghdrl);
1802 else
1803 len = sizeof(struct if_msghdr);
1804 break;
1805
1806 case RTM_NEWMADDR:
1807 len = sizeof(struct ifma_msghdr);
1808 break;
1809
1810 default:
1811 len = sizeof(struct rt_msghdr);
1812 }
1813
1814 if (w != NULL) {
1815 rtm = (struct rt_msghdr *)w->w_tmem;
1816 buflen = w->w_tmemsize - len;
1817 cp = (caddr_t)w->w_tmem + len;
1818 }
1819
1820 rtinfo->rti_addrs = 0;
1821 for (i = 0; i < RTAX_MAX; i++) {
1822 struct sockaddr *sa;
1823
1824 if ((sa = rtinfo->rti_info[i]) == NULL)
1825 continue;
1826 rtinfo->rti_addrs |= (1 << i);
1827 #ifdef COMPAT_FREEBSD32
1828 if (compat32)
1829 dlen = SA_SIZE32(sa);
1830 else
1831 #endif
1832 dlen = SA_SIZE(sa);
1833 if (cp != NULL && buflen >= dlen) {
1834 KASSERT(dlen <= sizeof(ss),
1835 ("%s: sockaddr size overflow", __func__));
1836 bzero(&ss, sizeof(ss));
1837 bcopy(sa, &ss, sa->sa_len);
1838 sa = (struct sockaddr *)&ss;
1839 #ifdef INET6
1840 if (sa->sa_family == AF_INET6) {
1841 sin6 = (struct sockaddr_in6 *)sa;
1842 (void)sa6_recoverscope(sin6);
1843 }
1844 #endif
1845 bcopy((caddr_t)sa, cp, (unsigned)dlen);
1846 cp += dlen;
1847 buflen -= dlen;
1848 } else if (cp != NULL) {
1849 /*
1850 * Buffer too small. Count needed size
1851 * and return with error.
1852 */
1853 cp = NULL;
1854 }
1855
1856 len += dlen;
1857 }
1858
1859 if (cp != NULL) {
1860 dlen = ALIGN(len) - len;
1861 if (buflen < dlen)
1862 cp = NULL;
1863 else {
1864 bzero(cp, dlen);
1865 cp += dlen;
1866 buflen -= dlen;
1867 }
1868 }
1869 len = ALIGN(len);
1870
1871 if (cp != NULL) {
1872 /* fill header iff buffer is large enough */
1873 rtm->rtm_version = RTM_VERSION;
1874 rtm->rtm_type = type;
1875 rtm->rtm_msglen = len;
1876 }
1877
1878 *plen = len;
1879
1880 if (w != NULL && cp == NULL)
1881 return (ENOBUFS);
1882
1883 return (0);
1884 }
1885
1886 /*
1887 * This routine is called to generate a message from the routing
1888 * socket indicating that a redirect has occurred, a routing lookup
1889 * has failed, or that a protocol has detected timeouts to a particular
1890 * destination.
1891 */
1892 void
1893 rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
1894 int fibnum)
1895 {
1896 struct rt_msghdr *rtm;
1897 struct mbuf *m;
1898 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1899
1900 if (V_route_cb.any_count == 0)
1901 return;
1902 m = rtsock_msg_mbuf(type, rtinfo);
1903 if (m == NULL)
1904 return;
1905
1906 if (fibnum != RT_ALL_FIBS) {
1907 KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
1908 "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
1909 M_SETFIB(m, fibnum);
1910 m->m_flags |= RTS_FILTER_FIB;
1911 }
1912
1913 rtm = mtod(m, struct rt_msghdr *);
1914 rtm->rtm_flags = RTF_DONE | flags;
1915 rtm->rtm_errno = error;
1916 rtm->rtm_addrs = rtinfo->rti_addrs;
1917 rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1918 }
1919
1920 void
1921 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1922 {
1923
1924 rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
1925 }
1926
1927 /*
1928 * This routine is called to generate a message from the routing
1929 * socket indicating that the status of a network interface has changed.
1930 */
1931 static void
1932 rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask __unused)
1933 {
1934 struct if_msghdr *ifm;
1935 struct mbuf *m;
1936 struct rt_addrinfo info;
1937
1938 if (V_route_cb.any_count == 0)
1939 return;
1940 bzero((caddr_t)&info, sizeof(info));
1941 m = rtsock_msg_mbuf(RTM_IFINFO, &info);
1942 if (m == NULL)
1943 return;
1944 ifm = mtod(m, struct if_msghdr *);
1945 ifm->ifm_index = ifp->if_index;
1946 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1947 if_data_copy(ifp, &ifm->ifm_data);
1948 ifm->ifm_addrs = 0;
1949 rt_dispatch(m, AF_UNSPEC);
1950 }
1951
1952 /*
1953 * Announce interface address arrival/withdraw.
1954 * Please do not call directly, use rt_addrmsg().
1955 * Assume input data to be valid.
1956 * Returns 0 on success.
1957 */
1958 int
1959 rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
1960 {
1961 struct rt_addrinfo info;
1962 struct sockaddr *sa;
1963 int ncmd;
1964 struct mbuf *m;
1965 struct ifa_msghdr *ifam;
1966 struct ifnet *ifp = ifa->ifa_ifp;
1967 struct sockaddr_storage ss;
1968
1969 if (V_route_cb.any_count == 0)
1970 return (0);
1971
1972 ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1973
1974 bzero((caddr_t)&info, sizeof(info));
1975 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1976 info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1977 info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
1978 info.rti_info[RTAX_IFA], ifa->ifa_netmask, &ss);
1979 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1980 if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
1981 return (ENOBUFS);
1982 ifam = mtod(m, struct ifa_msghdr *);
1983 ifam->ifam_index = ifp->if_index;
1984 ifam->ifam_metric = ifa->ifa_ifp->if_metric;
1985 ifam->ifam_flags = ifa->ifa_flags;
1986 ifam->ifam_addrs = info.rti_addrs;
1987
1988 if (fibnum != RT_ALL_FIBS) {
1989 M_SETFIB(m, fibnum);
1990 m->m_flags |= RTS_FILTER_FIB;
1991 }
1992
1993 rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1994
1995 return (0);
1996 }
1997
1998 /*
1999 * Announce route addition/removal to rtsock based on @rt data.
2000 * Callers are advives to use rt_routemsg() instead of using this
2001 * function directly.
2002 * Assume @rt data is consistent.
2003 *
2004 * Returns 0 on success.
2005 */
2006 int
2007 rtsock_routemsg(int cmd, struct rtentry *rt, struct nhop_object *nh,
2008 int fibnum)
2009 {
2010 union sockaddr_union dst, mask;
2011 struct rt_addrinfo info;
2012
2013 if (V_route_cb.any_count == 0)
2014 return (0);
2015
2016 int family = rt_get_family(rt);
2017 init_sockaddrs_family(family, &dst.sa, &mask.sa);
2018 export_rtaddrs(rt, &dst.sa, &mask.sa);
2019
2020 bzero((caddr_t)&info, sizeof(info));
2021 info.rti_info[RTAX_DST] = &dst.sa;
2022 info.rti_info[RTAX_NETMASK] = &mask.sa;
2023 info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
2024 info.rti_flags = rt->rte_flags | nhop_get_rtflags(nh);
2025 info.rti_ifp = nh->nh_ifp;
2026
2027 return (rtsock_routemsg_info(cmd, &info, fibnum));
2028 }
2029
2030 int
2031 rtsock_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum)
2032 {
2033 struct rt_msghdr *rtm;
2034 struct sockaddr *sa;
2035 struct mbuf *m;
2036
2037 if (V_route_cb.any_count == 0)
2038 return (0);
2039
2040 if (info->rti_flags & RTF_HOST)
2041 info->rti_info[RTAX_NETMASK] = NULL;
2042
2043 m = rtsock_msg_mbuf(cmd, info);
2044 if (m == NULL)
2045 return (ENOBUFS);
2046
2047 if (fibnum != RT_ALL_FIBS) {
2048 KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
2049 "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
2050 M_SETFIB(m, fibnum);
2051 m->m_flags |= RTS_FILTER_FIB;
2052 }
2053
2054 rtm = mtod(m, struct rt_msghdr *);
2055 rtm->rtm_addrs = info->rti_addrs;
2056 if (info->rti_ifp != NULL)
2057 rtm->rtm_index = info->rti_ifp->if_index;
2058 /* Add RTF_DONE to indicate command 'completion' required by API */
2059 info->rti_flags |= RTF_DONE;
2060 /* Reported routes has to be up */
2061 if (cmd == RTM_ADD || cmd == RTM_CHANGE)
2062 info->rti_flags |= RTF_UP;
2063 rtm->rtm_flags = info->rti_flags;
2064
2065 sa = info->rti_info[RTAX_DST];
2066 rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
2067
2068 return (0);
2069 }
2070
2071 /*
2072 * This is the analogue to the rt_newaddrmsg which performs the same
2073 * function but for multicast group memberhips. This is easier since
2074 * there is no route state to worry about.
2075 */
2076 void
2077 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
2078 {
2079 struct rt_addrinfo info;
2080 struct mbuf *m = NULL;
2081 struct ifnet *ifp = ifma->ifma_ifp;
2082 struct ifma_msghdr *ifmam;
2083
2084 if (V_route_cb.any_count == 0)
2085 return;
2086
2087 bzero((caddr_t)&info, sizeof(info));
2088 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2089 if (ifp && ifp->if_addr)
2090 info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
2091 else
2092 info.rti_info[RTAX_IFP] = NULL;
2093 /*
2094 * If a link-layer address is present, present it as a ``gateway''
2095 * (similarly to how ARP entries, e.g., are presented).
2096 */
2097 info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
2098 m = rtsock_msg_mbuf(cmd, &info);
2099 if (m == NULL)
2100 return;
2101 ifmam = mtod(m, struct ifma_msghdr *);
2102 KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
2103 __func__));
2104 ifmam->ifmam_index = ifp->if_index;
2105 ifmam->ifmam_addrs = info.rti_addrs;
2106 rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
2107 }
2108
2109 static struct mbuf *
2110 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
2111 struct rt_addrinfo *info)
2112 {
2113 struct if_announcemsghdr *ifan;
2114 struct mbuf *m;
2115
2116 if (V_route_cb.any_count == 0)
2117 return NULL;
2118 bzero((caddr_t)info, sizeof(*info));
2119 m = rtsock_msg_mbuf(type, info);
2120 if (m != NULL) {
2121 ifan = mtod(m, struct if_announcemsghdr *);
2122 ifan->ifan_index = ifp->if_index;
2123 strlcpy(ifan->ifan_name, ifp->if_xname,
2124 sizeof(ifan->ifan_name));
2125 ifan->ifan_what = what;
2126 }
2127 return m;
2128 }
2129
2130 /*
2131 * This is called to generate routing socket messages indicating
2132 * IEEE80211 wireless events.
2133 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
2134 */
2135 void
2136 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
2137 {
2138 struct mbuf *m;
2139 struct rt_addrinfo info;
2140
2141 m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
2142 if (m != NULL) {
2143 /*
2144 * Append the ieee80211 data. Try to stick it in the
2145 * mbuf containing the ifannounce msg; otherwise allocate
2146 * a new mbuf and append.
2147 *
2148 * NB: we assume m is a single mbuf.
2149 */
2150 if (data_len > M_TRAILINGSPACE(m)) {
2151 struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
2152 if (n == NULL) {
2153 m_freem(m);
2154 return;
2155 }
2156 bcopy(data, mtod(n, void *), data_len);
2157 n->m_len = data_len;
2158 m->m_next = n;
2159 } else if (data_len > 0) {
2160 bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
2161 m->m_len += data_len;
2162 }
2163 if (m->m_flags & M_PKTHDR)
2164 m->m_pkthdr.len += data_len;
2165 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
2166 rt_dispatch(m, AF_UNSPEC);
2167 }
2168 }
2169
2170 /*
2171 * This is called to generate routing socket messages indicating
2172 * network interface arrival and departure.
2173 */
2174 static void
2175 rt_ifannouncemsg(struct ifnet *ifp, int what)
2176 {
2177 struct mbuf *m;
2178 struct rt_addrinfo info;
2179
2180 m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
2181 if (m != NULL)
2182 rt_dispatch(m, AF_UNSPEC);
2183 }
2184
2185 static void
2186 rt_dispatch(struct mbuf *m, sa_family_t saf)
2187 {
2188
2189 M_ASSERTPKTHDR(m);
2190
2191 m->m_rtsock_family = saf;
2192 if (V_loif)
2193 m->m_pkthdr.rcvif = V_loif;
2194 else {
2195 m_freem(m);
2196 return;
2197 }
2198 netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */
2199 }
2200
2201 /*
2202 * Checks if rte can be exported w.r.t jails/vnets.
2203 *
2204 * Returns true if it can, false otherwise.
2205 */
2206 static bool
2207 can_export_rte(struct ucred *td_ucred, bool rt_is_host,
2208 const struct sockaddr *rt_dst)
2209 {
2210
2211 if ((!rt_is_host) ? jailed_without_vnet(td_ucred)
2212 : prison_if(td_ucred, rt_dst) != 0)
2213 return (false);
2214 return (true);
2215 }
2216
2217
2218 /*
2219 * This is used in dumping the kernel table via sysctl().
2220 */
2221 static int
2222 sysctl_dumpentry(struct rtentry *rt, void *vw)
2223 {
2224 struct walkarg *w = vw;
2225 struct nhop_object *nh;
2226
2227 NET_EPOCH_ASSERT();
2228
2229 export_rtaddrs(rt, w->dst, w->mask);
2230 if (!can_export_rte(w->w_req->td->td_ucred, rt_is_host(rt), w->dst))
2231 return (0);
2232 nh = rt_get_raw_nhop(rt);
2233 #ifdef ROUTE_MPATH
2234 if (NH_IS_NHGRP(nh)) {
2235 const struct weightened_nhop *wn;
2236 uint32_t num_nhops;
2237 int error;
2238 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
2239 for (int i = 0; i < num_nhops; i++) {
2240 error = sysctl_dumpnhop(rt, wn[i].nh, wn[i].weight, w);
2241 if (error != 0)
2242 return (error);
2243 }
2244 } else
2245 #endif
2246 sysctl_dumpnhop(rt, nh, rt->rt_weight, w);
2247
2248 return (0);
2249 }
2250
2251
2252 static int
2253 sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh, uint32_t weight,
2254 struct walkarg *w)
2255 {
2256 struct rt_addrinfo info;
2257 int error = 0, size;
2258 uint32_t rtflags;
2259
2260 rtflags = nhop_get_rtflags(nh);
2261
2262 if (w->w_op == NET_RT_FLAGS && !(rtflags & w->w_arg))
2263 return (0);
2264
2265 bzero((caddr_t)&info, sizeof(info));
2266 info.rti_info[RTAX_DST] = w->dst;
2267 info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
2268 info.rti_info[RTAX_NETMASK] = (rtflags & RTF_HOST) ? NULL : w->mask;
2269 info.rti_info[RTAX_GENMASK] = 0;
2270 if (nh->nh_ifp && !(nh->nh_ifp->if_flags & IFF_DYING)) {
2271 info.rti_info[RTAX_IFP] = nh->nh_ifp->if_addr->ifa_addr;
2272 info.rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
2273 if (nh->nh_ifp->if_flags & IFF_POINTOPOINT)
2274 info.rti_info[RTAX_BRD] = nh->nh_ifa->ifa_dstaddr;
2275 }
2276 if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
2277 return (error);
2278 if (w->w_req && w->w_tmem) {
2279 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
2280
2281 bzero(&rtm->rtm_index,
2282 sizeof(*rtm) - offsetof(struct rt_msghdr, rtm_index));
2283
2284 /*
2285 * rte flags may consist of RTF_HOST (duplicated in nhop rtflags)
2286 * and RTF_UP (if entry is linked, which is always true here).
2287 * Given that, use nhop rtflags & add RTF_UP.
2288 */
2289 rtm->rtm_flags = rtflags | RTF_UP;
2290 if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
2291 rtm->rtm_flags = RTF_GATEWAY |
2292 (rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
2293 rt_getmetrics(rt, nh, &rtm->rtm_rmx);
2294 rtm->rtm_rmx.rmx_weight = weight;
2295 rtm->rtm_index = nh->nh_ifp->if_index;
2296 rtm->rtm_addrs = info.rti_addrs;
2297 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
2298 return (error);
2299 }
2300 return (error);
2301 }
2302
2303 static int
2304 sysctl_iflist_ifml(struct ifnet *ifp, const struct if_data *src_ifd,
2305 struct rt_addrinfo *info, struct walkarg *w, int len)
2306 {
2307 struct if_msghdrl *ifm;
2308 struct if_data *ifd;
2309
2310 ifm = (struct if_msghdrl *)w->w_tmem;
2311
2312 #ifdef COMPAT_FREEBSD32
2313 if (w->w_req->flags & SCTL_MASK32) {
2314 struct if_msghdrl32 *ifm32;
2315
2316 ifm32 = (struct if_msghdrl32 *)ifm;
2317 ifm32->ifm_addrs = info->rti_addrs;
2318 ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2319 ifm32->ifm_index = ifp->if_index;
2320 ifm32->_ifm_spare1 = 0;
2321 ifm32->ifm_len = sizeof(*ifm32);
2322 ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
2323 ifm32->_ifm_spare2 = 0;
2324 ifd = &ifm32->ifm_data;
2325 } else
2326 #endif
2327 {
2328 ifm->ifm_addrs = info->rti_addrs;
2329 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2330 ifm->ifm_index = ifp->if_index;
2331 ifm->_ifm_spare1 = 0;
2332 ifm->ifm_len = sizeof(*ifm);
2333 ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
2334 ifm->_ifm_spare2 = 0;
2335 ifd = &ifm->ifm_data;
2336 }
2337
2338 memcpy(ifd, src_ifd, sizeof(*ifd));
2339
2340 return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
2341 }
2342
2343 static int
2344 sysctl_iflist_ifm(struct ifnet *ifp, const struct if_data *src_ifd,
2345 struct rt_addrinfo *info, struct walkarg *w, int len)
2346 {
2347 struct if_msghdr *ifm;
2348 struct if_data *ifd;
2349
2350 ifm = (struct if_msghdr *)w->w_tmem;
2351
2352 #ifdef COMPAT_FREEBSD32
2353 if (w->w_req->flags & SCTL_MASK32) {
2354 struct if_msghdr32 *ifm32;
2355
2356 ifm32 = (struct if_msghdr32 *)ifm;
2357 ifm32->ifm_addrs = info->rti_addrs;
2358 ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2359 ifm32->ifm_index = ifp->if_index;
2360 ifm32->_ifm_spare1 = 0;
2361 ifd = &ifm32->ifm_data;
2362 } else
2363 #endif
2364 {
2365 ifm->ifm_addrs = info->rti_addrs;
2366 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2367 ifm->ifm_index = ifp->if_index;
2368 ifm->_ifm_spare1 = 0;
2369 ifd = &ifm->ifm_data;
2370 }
2371
2372 memcpy(ifd, src_ifd, sizeof(*ifd));
2373
2374 return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
2375 }
2376
2377 static int
2378 sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
2379 struct walkarg *w, int len)
2380 {
2381 struct ifa_msghdrl *ifam;
2382 struct if_data *ifd;
2383
2384 ifam = (struct ifa_msghdrl *)w->w_tmem;
2385
2386 #ifdef COMPAT_FREEBSD32
2387 if (w->w_req->flags & SCTL_MASK32) {
2388 struct ifa_msghdrl32 *ifam32;
2389
2390 ifam32 = (struct ifa_msghdrl32 *)ifam;
2391 ifam32->ifam_addrs = info->rti_addrs;
2392 ifam32->ifam_flags = ifa->ifa_flags;
2393 ifam32->ifam_index = ifa->ifa_ifp->if_index;
2394 ifam32->_ifam_spare1 = 0;
2395 ifam32->ifam_len = sizeof(*ifam32);
2396 ifam32->ifam_data_off =
2397 offsetof(struct ifa_msghdrl32, ifam_data);
2398 ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
2399 ifd = &ifam32->ifam_data;
2400 } else
2401 #endif
2402 {
2403 ifam->ifam_addrs = info->rti_addrs;
2404 ifam->ifam_flags = ifa->ifa_flags;
2405 ifam->ifam_index = ifa->ifa_ifp->if_index;
2406 ifam->_ifam_spare1 = 0;
2407 ifam->ifam_len = sizeof(*ifam);
2408 ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
2409 ifam->ifam_metric = ifa->ifa_ifp->if_metric;
2410 ifd = &ifam->ifam_data;
2411 }
2412
2413 bzero(ifd, sizeof(*ifd));
2414 ifd->ifi_datalen = sizeof(struct if_data);
2415 ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
2416 ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
2417 ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
2418 ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
2419
2420 /* Fixup if_data carp(4) vhid. */
2421 if (carp_get_vhid_p != NULL)
2422 ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
2423
2424 return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
2425 }
2426
2427 static int
2428 sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
2429 struct walkarg *w, int len)
2430 {
2431 struct ifa_msghdr *ifam;
2432
2433 ifam = (struct ifa_msghdr *)w->w_tmem;
2434 ifam->ifam_addrs = info->rti_addrs;
2435 ifam->ifam_flags = ifa->ifa_flags;
2436 ifam->ifam_index = ifa->ifa_ifp->if_index;
2437 ifam->_ifam_spare1 = 0;
2438 ifam->ifam_metric = ifa->ifa_ifp->if_metric;
2439
2440 return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
2441 }
2442
2443 static int
2444 sysctl_iflist(int af, struct walkarg *w)
2445 {
2446 struct ifnet *ifp;
2447 struct ifaddr *ifa;
2448 struct if_data ifd;
2449 struct rt_addrinfo info;
2450 int len, error = 0;
2451 struct sockaddr_storage ss;
2452
2453 bzero((caddr_t)&info, sizeof(info));
2454 bzero(&ifd, sizeof(ifd));
2455 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2456 if (w->w_arg && w->w_arg != ifp->if_index)
2457 continue;
2458 if_data_copy(ifp, &ifd);
2459 ifa = ifp->if_addr;
2460 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2461 error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
2462 if (error != 0)
2463 goto done;
2464 info.rti_info[RTAX_IFP] = NULL;
2465 if (w->w_req && w->w_tmem) {
2466 if (w->w_op == NET_RT_IFLISTL)
2467 error = sysctl_iflist_ifml(ifp, &ifd, &info, w,
2468 len);
2469 else
2470 error = sysctl_iflist_ifm(ifp, &ifd, &info, w,
2471 len);
2472 if (error)
2473 goto done;
2474 }
2475 while ((ifa = CK_STAILQ_NEXT(ifa, ifa_link)) != NULL) {
2476 if (af && af != ifa->ifa_addr->sa_family)
2477 continue;
2478 if (prison_if(w->w_req->td->td_ucred,
2479 ifa->ifa_addr) != 0)
2480 continue;
2481 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2482 info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
2483 ifa->ifa_addr, ifa->ifa_netmask, &ss);
2484 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2485 error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
2486 if (error != 0)
2487 goto done;
2488 if (w->w_req && w->w_tmem) {
2489 if (w->w_op == NET_RT_IFLISTL)
2490 error = sysctl_iflist_ifaml(ifa, &info,
2491 w, len);
2492 else
2493 error = sysctl_iflist_ifam(ifa, &info,
2494 w, len);
2495 if (error)
2496 goto done;
2497 }
2498 }
2499 info.rti_info[RTAX_IFA] = NULL;
2500 info.rti_info[RTAX_NETMASK] = NULL;
2501 info.rti_info[RTAX_BRD] = NULL;
2502 }
2503 done:
2504 return (error);
2505 }
2506
2507 static int
2508 sysctl_ifmalist(int af, struct walkarg *w)
2509 {
2510 struct rt_addrinfo info;
2511 struct ifaddr *ifa;
2512 struct ifmultiaddr *ifma;
2513 struct ifnet *ifp;
2514 int error, len;
2515
2516 NET_EPOCH_ASSERT();
2517
2518 error = 0;
2519 bzero((caddr_t)&info, sizeof(info));
2520
2521 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2522 if (w->w_arg && w->w_arg != ifp->if_index)
2523 continue;
2524 ifa = ifp->if_addr;
2525 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
2526 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2527 if (af && af != ifma->ifma_addr->sa_family)
2528 continue;
2529 if (prison_if(w->w_req->td->td_ucred,
2530 ifma->ifma_addr) != 0)
2531 continue;
2532 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2533 info.rti_info[RTAX_GATEWAY] =
2534 (ifma->ifma_addr->sa_family != AF_LINK) ?
2535 ifma->ifma_lladdr : NULL;
2536 error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
2537 if (error != 0)
2538 break;
2539 if (w->w_req && w->w_tmem) {
2540 struct ifma_msghdr *ifmam;
2541
2542 ifmam = (struct ifma_msghdr *)w->w_tmem;
2543 ifmam->ifmam_index = ifma->ifma_ifp->if_index;
2544 ifmam->ifmam_flags = 0;
2545 ifmam->ifmam_addrs = info.rti_addrs;
2546 ifmam->_ifmam_spare1 = 0;
2547 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
2548 if (error != 0)
2549 break;
2550 }
2551 }
2552 if (error != 0)
2553 break;
2554 }
2555 return (error);
2556 }
2557
2558 static void
2559 rtable_sysctl_dump(uint32_t fibnum, int family, struct walkarg *w)
2560 {
2561 union sockaddr_union sa_dst, sa_mask;
2562
2563 w->family = family;
2564 w->dst = (struct sockaddr *)&sa_dst;
2565 w->mask = (struct sockaddr *)&sa_mask;
2566
2567 init_sockaddrs_family(family, w->dst, w->mask);
2568
2569 rib_walk(fibnum, family, false, sysctl_dumpentry, w);
2570 }
2571
2572 static int
2573 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
2574 {
2575 struct epoch_tracker et;
2576 int *name = (int *)arg1;
2577 u_int namelen = arg2;
2578 struct rib_head *rnh = NULL; /* silence compiler. */
2579 int i, lim, error = EINVAL;
2580 int fib = 0;
2581 u_char af;
2582 struct walkarg w;
2583
2584 if (namelen < 3)
2585 return (EINVAL);
2586
2587 name++;
2588 namelen--;
2589 if (req->newptr)
2590 return (EPERM);
2591 if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP || name[1] == NET_RT_NHGRP) {
2592 if (namelen == 3)
2593 fib = req->td->td_proc->p_fibnum;
2594 else if (namelen == 4)
2595 fib = (name[3] == RT_ALL_FIBS) ?
2596 req->td->td_proc->p_fibnum : name[3];
2597 else
2598 return ((namelen < 3) ? EISDIR : ENOTDIR);
2599 if (fib < 0 || fib >= rt_numfibs)
2600 return (EINVAL);
2601 } else if (namelen != 3)
2602 return ((namelen < 3) ? EISDIR : ENOTDIR);
2603 af = name[0];
2604 if (af > AF_MAX)
2605 return (EINVAL);
2606 bzero(&w, sizeof(w));
2607 w.w_op = name[1];
2608 w.w_arg = name[2];
2609 w.w_req = req;
2610
2611 error = sysctl_wire_old_buffer(req, 0);
2612 if (error)
2613 return (error);
2614
2615 /*
2616 * Allocate reply buffer in advance.
2617 * All rtsock messages has maximum length of u_short.
2618 */
2619 w.w_tmemsize = 65536;
2620 w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
2621
2622 NET_EPOCH_ENTER(et);
2623 switch (w.w_op) {
2624 case NET_RT_DUMP:
2625 case NET_RT_FLAGS:
2626 if (af == 0) { /* dump all tables */
2627 i = 1;
2628 lim = AF_MAX;
2629 } else /* dump only one table */
2630 i = lim = af;
2631
2632 /*
2633 * take care of llinfo entries, the caller must
2634 * specify an AF
2635 */
2636 if (w.w_op == NET_RT_FLAGS &&
2637 (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
2638 if (af != 0)
2639 error = lltable_sysctl_dumparp(af, w.w_req);
2640 else
2641 error = EINVAL;
2642 break;
2643 }
2644 /*
2645 * take care of routing entries
2646 */
2647 for (error = 0; error == 0 && i <= lim; i++) {
2648 rnh = rt_tables_get_rnh(fib, i);
2649 if (rnh != NULL) {
2650 rtable_sysctl_dump(fib, i, &w);
2651 } else if (af != 0)
2652 error = EAFNOSUPPORT;
2653 }
2654 break;
2655 case NET_RT_NHOP:
2656 case NET_RT_NHGRP:
2657 /* Allow dumping one specific af/fib at a time */
2658 if (namelen < 4) {
2659 error = EINVAL;
2660 break;
2661 }
2662 fib = name[3];
2663 if (fib < 0 || fib > rt_numfibs) {
2664 error = EINVAL;
2665 break;
2666 }
2667 rnh = rt_tables_get_rnh(fib, af);
2668 if (rnh == NULL) {
2669 error = EAFNOSUPPORT;
2670 break;
2671 }
2672 if (w.w_op == NET_RT_NHOP)
2673 error = nhops_dump_sysctl(rnh, w.w_req);
2674 else
2675 #ifdef ROUTE_MPATH
2676 error = nhgrp_dump_sysctl(rnh, w.w_req);
2677 #else
2678 error = ENOTSUP;
2679 #endif
2680 break;
2681 case NET_RT_IFLIST:
2682 case NET_RT_IFLISTL:
2683 error = sysctl_iflist(af, &w);
2684 break;
2685
2686 case NET_RT_IFMALIST:
2687 error = sysctl_ifmalist(af, &w);
2688 break;
2689 }
2690 NET_EPOCH_EXIT(et);
2691
2692 free(w.w_tmem, M_TEMP);
2693 return (error);
2694 }
2695
2696 static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_MPSAFE,
2697 sysctl_rtsock, "Return route tables and interface/address lists");
2698
2699 /*
2700 * Definitions of protocols supported in the ROUTE domain.
2701 */
2702
2703 static struct domain routedomain; /* or at least forward */
2704
2705 static struct protosw routesw = {
2706 .pr_type = SOCK_RAW,
2707 .pr_flags = PR_ATOMIC|PR_ADDR,
2708 .pr_abort = rts_close,
2709 .pr_attach = rts_attach,
2710 .pr_detach = rts_detach,
2711 .pr_send = rts_send,
2712 .pr_shutdown = rts_shutdown,
2713 .pr_disconnect = rts_disconnect,
2714 .pr_close = rts_close,
2715 };
2716
2717 static struct domain routedomain = {
2718 .dom_family = PF_ROUTE,
2719 .dom_name = "route",
2720 .dom_nprotosw = 1,
2721 .dom_protosw = { &routesw },
2722 };
2723
2724 DOMAIN_SET(route);
Cache object: 721e4c534aa1b0742a513b6a1f5524cf
|