FreeBSD/Linux Kernel Cross Reference
sys/net/rtsock.c
1 /* $OpenBSD: rtsock.c,v 1.359 2023/01/22 12:05:44 mvs Exp $ */
2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1988, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80
81 #include <netinet/in.h>
82
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97
98 #define ROUTESNDQ 8192
99 #define ROUTERCVQ 8192
100
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102
103 struct walkarg {
104 int w_op, w_arg, w_tmemsize;
105 size_t w_given, w_needed;
106 caddr_t w_where, w_tmem;
107 };
108
109 void route_prinit(void);
110 void rcb_ref(void *, void *);
111 void rcb_unref(void *, void *);
112 int route_output(struct mbuf *, struct socket *);
113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int route_attach(struct socket *, int, int);
115 int route_detach(struct socket *);
116 int route_disconnect(struct socket *);
117 int route_shutdown(struct socket *);
118 void route_rcvd(struct socket *);
119 int route_send(struct socket *, struct mbuf *, struct mbuf *,
120 struct mbuf *);
121 int route_sockaddr(struct socket *, struct mbuf *);
122 int route_peeraddr(struct socket *, struct mbuf *);
123 void route_input(struct mbuf *m0, struct socket *, sa_family_t);
124 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
125 int route_cleargateway(struct rtentry *, void *, unsigned int);
126 void rtm_senddesync_timer(void *);
127 void rtm_senddesync(struct socket *);
128 int rtm_sendup(struct socket *, struct mbuf *);
129
130 int rtm_getifa(struct rt_addrinfo *, unsigned int);
131 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
132 uint8_t, unsigned int);
133 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
134 struct mbuf *rtm_msg1(int, struct rt_addrinfo *);
135 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
136 struct walkarg *);
137 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
138 int rtm_validate_proposal(struct rt_addrinfo *);
139 void rtm_setmetrics(u_long, const struct rt_metrics *,
140 struct rt_kmetrics *);
141 void rtm_getmetrics(const struct rtentry *,
142 struct rt_metrics *);
143
144 int sysctl_iflist(int, struct walkarg *);
145 int sysctl_ifnames(struct walkarg *);
146 int sysctl_rtable_rtstat(void *, size_t *, void *);
147
148 int rt_setsource(unsigned int, struct sockaddr *);
149
150 /*
151 * Locks used to protect struct members
152 * I immutable after creation
153 * s solock
154 */
155 struct rtpcb {
156 struct socket *rop_socket; /* [I] */
157
158 SRPL_ENTRY(rtpcb) rop_list;
159 struct refcnt rop_refcnt;
160 struct timeout rop_timeout;
161 unsigned int rop_msgfilter; /* [s] */
162 unsigned int rop_flagfilter; /* [s] */
163 unsigned int rop_flags; /* [s] */
164 u_int rop_rtableid; /* [s] */
165 unsigned short rop_proto; /* [I] */
166 u_char rop_priority; /* [s] */
167 };
168 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb)
169
170 struct rtptable {
171 SRPL_HEAD(, rtpcb) rtp_list;
172 struct srpl_rc rtp_rc;
173 struct rwlock rtp_lk;
174 unsigned int rtp_count;
175 };
176
177 struct pool rtpcb_pool;
178 struct rtptable rtptable;
179
180 /*
181 * These flags and timeout are used for indicating to userland (via a
182 * RTM_DESYNC msg) when the route socket has overflowed and messages
183 * have been lost.
184 */
185 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */
186 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before
187 queueing more packets */
188
189 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */
190
191 void
192 route_prinit(void)
193 {
194 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
195 rw_init(&rtptable.rtp_lk, "rtsock");
196 SRPL_INIT(&rtptable.rtp_list);
197 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
198 IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
199 }
200
201 void
202 rcb_ref(void *null, void *v)
203 {
204 struct rtpcb *rop = v;
205
206 refcnt_take(&rop->rop_refcnt);
207 }
208
209 void
210 rcb_unref(void *null, void *v)
211 {
212 struct rtpcb *rop = v;
213
214 refcnt_rele_wake(&rop->rop_refcnt);
215 }
216
217 int
218 route_attach(struct socket *so, int proto, int wait)
219 {
220 struct rtpcb *rop;
221 int error;
222
223 error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
224 if (error)
225 return (error);
226 /*
227 * use the rawcb but allocate a rtpcb, this
228 * code does not care about the additional fields
229 * and works directly on the raw socket.
230 */
231 rop = pool_get(&rtpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
232 PR_ZERO);
233 if (rop == NULL)
234 return (ENOBUFS);
235 so->so_pcb = rop;
236 /* Init the timeout structure */
237 timeout_set_proc(&rop->rop_timeout, rtm_senddesync_timer, so);
238 refcnt_init(&rop->rop_refcnt);
239
240 rop->rop_socket = so;
241 rop->rop_proto = proto;
242
243 rop->rop_rtableid = curproc->p_p->ps_rtableid;
244
245 soisconnected(so);
246 so->so_options |= SO_USELOOPBACK;
247
248 rw_enter(&rtptable.rtp_lk, RW_WRITE);
249 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
250 rop_list);
251 rtptable.rtp_count++;
252 rw_exit(&rtptable.rtp_lk);
253
254 return (0);
255 }
256
257 int
258 route_detach(struct socket *so)
259 {
260 struct rtpcb *rop;
261
262 soassertlocked(so);
263
264 rop = sotortpcb(so);
265 if (rop == NULL)
266 return (EINVAL);
267
268 rw_enter(&rtptable.rtp_lk, RW_WRITE);
269
270 rtptable.rtp_count--;
271 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
272 rop_list);
273 rw_exit(&rtptable.rtp_lk);
274
275 sounlock(so);
276
277 /* wait for all references to drop */
278 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
279 timeout_del_barrier(&rop->rop_timeout);
280
281 solock(so);
282
283 so->so_pcb = NULL;
284 KASSERT((so->so_state & SS_NOFDREF) == 0);
285 pool_put(&rtpcb_pool, rop);
286
287 return (0);
288 }
289
290 int
291 route_disconnect(struct socket *so)
292 {
293 soisdisconnected(so);
294 return (0);
295 }
296
297 int
298 route_shutdown(struct socket *so)
299 {
300 socantsendmore(so);
301 return (0);
302 }
303
304 void
305 route_rcvd(struct socket *so)
306 {
307 struct rtpcb *rop = sotortpcb(so);
308
309 soassertlocked(so);
310
311 /*
312 * If we are in a FLUSH state, check if the buffer is
313 * empty so that we can clear the flag.
314 */
315 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
316 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
317 rop->rop_socket->so_rcv.sb_hiwat)))
318 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
319 }
320
321 int
322 route_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
323 struct mbuf *control)
324 {
325 int error;
326
327 soassertlocked(so);
328
329 if (control && control->m_len) {
330 error = EOPNOTSUPP;
331 goto out;
332 }
333
334 if (nam) {
335 error = EISCONN;
336 goto out;
337 }
338
339 error = route_output(m, so);
340 m = NULL;
341
342 out:
343 m_freem(control);
344 m_freem(m);
345
346 return (error);
347 }
348
349 int
350 route_sockaddr(struct socket *so, struct mbuf *nam)
351 {
352 return (EINVAL);
353 }
354
355 int
356 route_peeraddr(struct socket *so, struct mbuf *nam)
357 {
358 /* minimal support, just implement a fake peer address */
359 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
360 nam->m_len = route_src.sa_len;
361 return (0);
362 }
363
364 int
365 route_ctloutput(int op, struct socket *so, int level, int optname,
366 struct mbuf *m)
367 {
368 struct rtpcb *rop = sotortpcb(so);
369 int error = 0;
370 unsigned int tid, prio;
371
372 if (level != AF_ROUTE)
373 return (EINVAL);
374
375 switch (op) {
376 case PRCO_SETOPT:
377 switch (optname) {
378 case ROUTE_MSGFILTER:
379 if (m == NULL || m->m_len != sizeof(unsigned int))
380 error = EINVAL;
381 else
382 rop->rop_msgfilter = *mtod(m, unsigned int *);
383 break;
384 case ROUTE_TABLEFILTER:
385 if (m == NULL || m->m_len != sizeof(unsigned int)) {
386 error = EINVAL;
387 break;
388 }
389 tid = *mtod(m, unsigned int *);
390 if (tid != RTABLE_ANY && !rtable_exists(tid))
391 error = ENOENT;
392 else
393 rop->rop_rtableid = tid;
394 break;
395 case ROUTE_PRIOFILTER:
396 if (m == NULL || m->m_len != sizeof(unsigned int)) {
397 error = EINVAL;
398 break;
399 }
400 prio = *mtod(m, unsigned int *);
401 if (prio > RTP_MAX)
402 error = EINVAL;
403 else
404 rop->rop_priority = prio;
405 break;
406 case ROUTE_FLAGFILTER:
407 if (m == NULL || m->m_len != sizeof(unsigned int))
408 error = EINVAL;
409 else
410 rop->rop_flagfilter = *mtod(m, unsigned int *);
411 break;
412 default:
413 error = ENOPROTOOPT;
414 break;
415 }
416 break;
417 case PRCO_GETOPT:
418 switch (optname) {
419 case ROUTE_MSGFILTER:
420 m->m_len = sizeof(unsigned int);
421 *mtod(m, unsigned int *) = rop->rop_msgfilter;
422 break;
423 case ROUTE_TABLEFILTER:
424 m->m_len = sizeof(unsigned int);
425 *mtod(m, unsigned int *) = rop->rop_rtableid;
426 break;
427 case ROUTE_PRIOFILTER:
428 m->m_len = sizeof(unsigned int);
429 *mtod(m, unsigned int *) = rop->rop_priority;
430 break;
431 case ROUTE_FLAGFILTER:
432 m->m_len = sizeof(unsigned int);
433 *mtod(m, unsigned int *) = rop->rop_flagfilter;
434 break;
435 default:
436 error = ENOPROTOOPT;
437 break;
438 }
439 }
440 return (error);
441 }
442
443 void
444 rtm_senddesync_timer(void *xso)
445 {
446 struct socket *so = xso;
447
448 solock(so);
449 rtm_senddesync(so);
450 sounlock(so);
451 }
452
453 void
454 rtm_senddesync(struct socket *so)
455 {
456 struct rtpcb *rop = sotortpcb(so);
457 struct mbuf *desync_mbuf;
458
459 soassertlocked(so);
460
461 /*
462 * Dying socket is disconnected by upper layer and there is
463 * no reason to send packet. Also we shouldn't reschedule
464 * timeout(9), otherwise timeout_del_barrier(9) can't help us.
465 */
466 if ((so->so_state & SS_ISCONNECTED) == 0 ||
467 (so->so_rcv.sb_state & SS_CANTRCVMORE))
468 return;
469
470 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
471 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
472 return;
473
474 /*
475 * If we fail to alloc memory or if sbappendaddr()
476 * fails, re-add timeout and try again.
477 */
478 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
479 if (desync_mbuf != NULL) {
480 if (sbappendaddr(so, &so->so_rcv, &route_src,
481 desync_mbuf, NULL) != 0) {
482 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
483 sorwakeup(rop->rop_socket);
484 return;
485 }
486 m_freem(desync_mbuf);
487 }
488 /* Re-add timeout to try sending msg again */
489 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
490 }
491
492 void
493 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
494 {
495 struct socket *so;
496 struct rtpcb *rop;
497 struct rt_msghdr *rtm;
498 struct mbuf *m = m0;
499 struct srp_ref sr;
500
501 /* ensure that we can access the rtm_type via mtod() */
502 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
503 m_freem(m);
504 return;
505 }
506
507 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
508 /*
509 * If route socket is bound to an address family only send
510 * messages that match the address family. Address family
511 * agnostic messages are always sent.
512 */
513 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
514 rop->rop_proto != sa_family)
515 continue;
516
517
518 so = rop->rop_socket;
519 solock(so);
520
521 /*
522 * Check to see if we don't want our own messages and
523 * if we can receive anything.
524 */
525 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
526 !(so->so_state & SS_ISCONNECTED) ||
527 (so->so_rcv.sb_state & SS_CANTRCVMORE))
528 goto next;
529
530 /* filter messages that the process does not want */
531 rtm = mtod(m, struct rt_msghdr *);
532 /* but RTM_DESYNC can't be filtered */
533 if (rtm->rtm_type != RTM_DESYNC) {
534 if (rop->rop_msgfilter != 0 &&
535 !(rop->rop_msgfilter & (1U << rtm->rtm_type)))
536 goto next;
537 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags))
538 goto next;
539 }
540 switch (rtm->rtm_type) {
541 case RTM_IFANNOUNCE:
542 case RTM_DESYNC:
543 /* no tableid */
544 break;
545 case RTM_RESOLVE:
546 case RTM_NEWADDR:
547 case RTM_DELADDR:
548 case RTM_IFINFO:
549 case RTM_80211INFO:
550 case RTM_BFD:
551 /* check against rdomain id */
552 if (rop->rop_rtableid != RTABLE_ANY &&
553 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
554 goto next;
555 break;
556 default:
557 if (rop->rop_priority != 0 &&
558 rop->rop_priority < rtm->rtm_priority)
559 goto next;
560 /* check against rtable id */
561 if (rop->rop_rtableid != RTABLE_ANY &&
562 rop->rop_rtableid != rtm->rtm_tableid)
563 goto next;
564 break;
565 }
566
567 /*
568 * Check to see if the flush flag is set. If so, don't queue
569 * any more messages until the flag is cleared.
570 */
571 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
572 goto next;
573
574 rtm_sendup(so, m);
575 next:
576 sounlock(so);
577 }
578 SRPL_LEAVE(&sr);
579
580 m_freem(m);
581 }
582
583 int
584 rtm_sendup(struct socket *so, struct mbuf *m0)
585 {
586 struct rtpcb *rop = sotortpcb(so);
587 struct mbuf *m;
588
589 soassertlocked(so);
590
591 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
592 if (m == NULL)
593 return (ENOMEM);
594
595 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
596 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
597 /* Flag socket as desync'ed and flush required */
598 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
599 rtm_senddesync(so);
600 m_freem(m);
601 return (ENOBUFS);
602 }
603
604 sorwakeup(so);
605 return (0);
606 }
607
608 struct rt_msghdr *
609 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
610 {
611 struct rt_msghdr *rtm;
612 struct rt_addrinfo info;
613 struct sockaddr_rtlabel sa_rl;
614 struct sockaddr_in6 sa_mask;
615 #ifdef BFD
616 struct sockaddr_bfd sa_bfd;
617 #endif
618 struct ifnet *ifp = NULL;
619 int len;
620
621 bzero(&info, sizeof(info));
622 info.rti_info[RTAX_DST] = rt_key(rt);
623 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
624 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
625 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
626 #ifdef BFD
627 if (rt->rt_flags & RTF_BFD) {
628 KERNEL_LOCK();
629 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
630 KERNEL_UNLOCK();
631 }
632 #endif
633 #ifdef MPLS
634 if (rt->rt_flags & RTF_MPLS) {
635 struct sockaddr_mpls sa_mpls;
636
637 bzero(&sa_mpls, sizeof(sa_mpls));
638 sa_mpls.smpls_family = AF_MPLS;
639 sa_mpls.smpls_len = sizeof(sa_mpls);
640 sa_mpls.smpls_label = ((struct rt_mpls *)
641 rt->rt_llinfo)->mpls_label;
642 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
643 info.rti_mpls = ((struct rt_mpls *)
644 rt->rt_llinfo)->mpls_operation;
645 }
646 #endif
647 ifp = if_get(rt->rt_ifidx);
648 if (ifp != NULL) {
649 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
650 info.rti_info[RTAX_IFA] = rtable_getsource(tableid,
651 info.rti_info[RTAX_DST]->sa_family);
652 if (info.rti_info[RTAX_IFA] == NULL)
653 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
654 if (ifp->if_flags & IFF_POINTOPOINT)
655 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
656 }
657 if_put(ifp);
658 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
659
660 /* build new route message */
661 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
662 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
663
664 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
665 rtm->rtm_type = type;
666 rtm->rtm_index = rt->rt_ifidx;
667 rtm->rtm_tableid = tableid;
668 rtm->rtm_priority = rt->rt_priority & RTP_MASK;
669 rtm->rtm_flags = rt->rt_flags;
670 rtm->rtm_pid = curproc->p_p->ps_pid;
671 rtm->rtm_seq = seq;
672 rtm_getmetrics(rt, &rtm->rtm_rmx);
673 rtm->rtm_addrs = info.rti_addrs;
674 #ifdef MPLS
675 rtm->rtm_mpls = info.rti_mpls;
676 #endif
677 return rtm;
678 }
679
680 int
681 route_output(struct mbuf *m, struct socket *so)
682 {
683 struct rt_msghdr *rtm = NULL;
684 struct rtentry *rt = NULL;
685 struct rt_addrinfo info;
686 struct ifnet *ifp;
687 int len, seq, useloopback, error = 0;
688 u_int tableid;
689 u_int8_t prio;
690 u_char vers, type;
691
692 if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
693 (m = m_pullup(m, sizeof(int32_t))) == 0))
694 return (ENOBUFS);
695 if ((m->m_flags & M_PKTHDR) == 0)
696 panic("route_output");
697
698 useloopback = so->so_options & SO_USELOOPBACK;
699
700 /*
701 * The socket can't be closed concurrently because the file
702 * descriptor reference is still held.
703 */
704
705 sounlock(so);
706
707 len = m->m_pkthdr.len;
708 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 ||
709 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
710 error = EINVAL;
711 goto fail;
712 }
713 vers = mtod(m, struct rt_msghdr *)->rtm_version;
714 switch (vers) {
715 case RTM_VERSION:
716 if (len < sizeof(struct rt_msghdr)) {
717 error = EINVAL;
718 goto fail;
719 }
720 if (len > RTM_MAXSIZE) {
721 error = EMSGSIZE;
722 goto fail;
723 }
724 rtm = malloc(len, M_RTABLE, M_WAITOK);
725 m_copydata(m, 0, len, rtm);
726 break;
727 default:
728 error = EPROTONOSUPPORT;
729 goto fail;
730 }
731
732 /* Verify that the caller is sending an appropriate message early */
733 switch (rtm->rtm_type) {
734 case RTM_ADD:
735 case RTM_DELETE:
736 case RTM_GET:
737 case RTM_CHANGE:
738 case RTM_PROPOSAL:
739 case RTM_SOURCE:
740 break;
741 default:
742 error = EOPNOTSUPP;
743 goto fail;
744 }
745 /*
746 * Verify that the header length is valid.
747 * All messages from userland start with a struct rt_msghdr.
748 */
749 if (rtm->rtm_hdrlen == 0) /* old client */
750 rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
751 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
752 len < rtm->rtm_hdrlen) {
753 error = EINVAL;
754 goto fail;
755 }
756
757 rtm->rtm_pid = curproc->p_p->ps_pid;
758
759 /*
760 * Verify that the caller has the appropriate privilege; RTM_GET
761 * is the only operation the non-superuser is allowed.
762 */
763 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
764 error = EACCES;
765 goto fail;
766 }
767 tableid = rtm->rtm_tableid;
768 if (!rtable_exists(tableid)) {
769 if (rtm->rtm_type == RTM_ADD) {
770 if ((error = rtable_add(tableid)) != 0)
771 goto fail;
772 } else {
773 error = EINVAL;
774 goto fail;
775 }
776 }
777
778 /* Do not let userland play with kernel-only flags. */
779 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
780 error = EINVAL;
781 goto fail;
782 }
783
784 /* make sure that kernel-only bits are not set */
785 rtm->rtm_priority &= RTP_MASK;
786 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
787 rtm->rtm_fmask &= RTF_FMASK;
788
789 if (rtm->rtm_priority != 0) {
790 if (rtm->rtm_priority > RTP_MAX ||
791 rtm->rtm_priority == RTP_LOCAL) {
792 error = EINVAL;
793 goto fail;
794 }
795 prio = rtm->rtm_priority;
796 } else if (rtm->rtm_type != RTM_ADD)
797 prio = RTP_ANY;
798 else if (rtm->rtm_flags & RTF_STATIC)
799 prio = 0;
800 else
801 prio = RTP_DEFAULT;
802
803 bzero(&info, sizeof(info));
804 info.rti_addrs = rtm->rtm_addrs;
805 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
806 len + (caddr_t)rtm, &info)) != 0)
807 goto fail;
808
809 info.rti_flags = rtm->rtm_flags;
810
811 if (rtm->rtm_type != RTM_SOURCE &&
812 rtm->rtm_type != RTM_PROPOSAL &&
813 (info.rti_info[RTAX_DST] == NULL ||
814 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
815 (info.rti_info[RTAX_GATEWAY] != NULL &&
816 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
817 info.rti_info[RTAX_GENMASK] != NULL)) {
818 error = EINVAL;
819 goto fail;
820 }
821 #ifdef MPLS
822 info.rti_mpls = rtm->rtm_mpls;
823 #endif
824
825 if (info.rti_info[RTAX_GATEWAY] != NULL &&
826 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
827 (info.rti_flags & RTF_CLONING) == 0) {
828 info.rti_flags |= RTF_LLINFO;
829 }
830
831 /*
832 * Validate RTM_PROPOSAL and pass it along or error out.
833 */
834 if (rtm->rtm_type == RTM_PROPOSAL) {
835 if (rtm_validate_proposal(&info) == -1) {
836 error = EINVAL;
837 goto fail;
838 }
839 /*
840 * If this is a solicitation proposal forward request to
841 * all interfaces. Most handlers will ignore it but at least
842 * umb(4) will send a response to this event.
843 */
844 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
845 NET_LOCK();
846 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
847 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
848 }
849 NET_UNLOCK();
850 }
851 } else if (rtm->rtm_type == RTM_SOURCE) {
852 if (info.rti_info[RTAX_IFA] == NULL) {
853 error = EINVAL;
854 goto fail;
855 }
856 if ((error =
857 rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0)
858 goto fail;
859 } else {
860 error = rtm_output(rtm, &rt, &info, prio, tableid);
861 if (!error) {
862 type = rtm->rtm_type;
863 seq = rtm->rtm_seq;
864 free(rtm, M_RTABLE, len);
865 rtm = rtm_report(rt, type, seq, tableid);
866 len = rtm->rtm_msglen;
867 }
868 }
869
870 rtfree(rt);
871 if (error) {
872 rtm->rtm_errno = error;
873 } else {
874 rtm->rtm_flags |= RTF_DONE;
875 }
876
877 /*
878 * Check to see if we don't want our own messages.
879 */
880 if (!useloopback) {
881 if (rtptable.rtp_count == 0) {
882 /* no other listener and no loopback of messages */
883 goto fail;
884 }
885 }
886 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
887 m_freem(m);
888 m = NULL;
889 } else if (m->m_pkthdr.len > len)
890 m_adj(m, len - m->m_pkthdr.len);
891 free(rtm, M_RTABLE, len);
892 if (m)
893 route_input(m, so, info.rti_info[RTAX_DST] ?
894 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
895 solock(so);
896
897 return (error);
898 fail:
899 free(rtm, M_RTABLE, len);
900 m_freem(m);
901 solock(so);
902
903 return (error);
904 }
905
906 int
907 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
908 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
909 {
910 struct rtentry *rt = *prt;
911 struct ifnet *ifp = NULL;
912 int plen, newgate = 0, error = 0;
913
914 switch (rtm->rtm_type) {
915 case RTM_ADD:
916 if (info->rti_info[RTAX_GATEWAY] == NULL) {
917 error = EINVAL;
918 break;
919 }
920
921 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
922 if ((error = route_arp_conflict(rt, info))) {
923 rtfree(rt);
924 rt = NULL;
925 break;
926 }
927
928 /*
929 * We cannot go through a delete/create/insert cycle for
930 * cached route because this can lead to races in the
931 * receive path. Instead we update the L2 cache.
932 */
933 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) {
934 ifp = if_get(rt->rt_ifidx);
935 if (ifp == NULL) {
936 rtfree(rt);
937 rt = NULL;
938 error = ESRCH;
939 break;
940 }
941
942 goto change;
943 }
944
945 rtfree(rt);
946 rt = NULL;
947
948 NET_LOCK();
949 if ((error = rtm_getifa(info, tableid)) != 0) {
950 NET_UNLOCK();
951 break;
952 }
953 error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
954 NET_UNLOCK();
955 if (error == 0)
956 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
957 &rt->rt_rmx);
958 break;
959 case RTM_DELETE:
960 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
961 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
962 prio);
963 if (rt == NULL) {
964 error = ESRCH;
965 break;
966 }
967
968 /*
969 * If we got multipath routes, we require users to specify
970 * a matching gateway.
971 */
972 if (ISSET(rt->rt_flags, RTF_MPATH) &&
973 info->rti_info[RTAX_GATEWAY] == NULL) {
974 error = ESRCH;
975 break;
976 }
977
978 ifp = if_get(rt->rt_ifidx);
979 if (ifp == NULL) {
980 rtfree(rt);
981 rt = NULL;
982 error = ESRCH;
983 break;
984 }
985
986 /*
987 * Invalidate the cache of automagically created and
988 * referenced L2 entries to make sure that ``rt_gwroute''
989 * pointer stays valid for other CPUs.
990 */
991 if ((ISSET(rt->rt_flags, RTF_CACHED))) {
992 NET_LOCK();
993 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
994 /* Reset the MTU of the gateway route. */
995 rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
996 route_cleargateway, rt);
997 NET_UNLOCK();
998 break;
999 }
1000
1001 /*
1002 * Make sure that local routes are only modified by the
1003 * kernel.
1004 */
1005 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1006 error = EINVAL;
1007 break;
1008 }
1009
1010 rtfree(rt);
1011 rt = NULL;
1012
1013 NET_LOCK();
1014 error = rtrequest_delete(info, prio, ifp, &rt, tableid);
1015 NET_UNLOCK();
1016 break;
1017 case RTM_CHANGE:
1018 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1019 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1020 prio);
1021 /*
1022 * If we got multipath routes, we require users to specify
1023 * a matching gateway.
1024 */
1025 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
1026 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1027 rtfree(rt);
1028 rt = NULL;
1029 }
1030
1031 /*
1032 * If RTAX_GATEWAY is the argument we're trying to
1033 * change, try to find a compatible route.
1034 */
1035 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
1036 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1037 info->rti_info[RTAX_NETMASK], NULL, prio);
1038 /* Ensure we don't pick a multipath one. */
1039 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
1040 rtfree(rt);
1041 rt = NULL;
1042 }
1043 }
1044
1045 if (rt == NULL) {
1046 error = ESRCH;
1047 break;
1048 }
1049
1050 /*
1051 * Make sure that local routes are only modified by the
1052 * kernel.
1053 */
1054 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1055 error = EINVAL;
1056 break;
1057 }
1058
1059 ifp = if_get(rt->rt_ifidx);
1060 if (ifp == NULL) {
1061 rtfree(rt);
1062 rt = NULL;
1063 error = ESRCH;
1064 break;
1065 }
1066
1067 /*
1068 * RTM_CHANGE needs a perfect match.
1069 */
1070 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1071 info->rti_info[RTAX_NETMASK]);
1072 if (rt_plen(rt) != plen) {
1073 error = ESRCH;
1074 break;
1075 }
1076
1077 if (info->rti_info[RTAX_GATEWAY] != NULL)
1078 if (rt->rt_gateway == NULL ||
1079 bcmp(rt->rt_gateway,
1080 info->rti_info[RTAX_GATEWAY],
1081 info->rti_info[RTAX_GATEWAY]->sa_len)) {
1082 newgate = 1;
1083 }
1084 /*
1085 * Check reachable gateway before changing the route.
1086 * New gateway could require new ifaddr, ifp;
1087 * flags may also be different; ifp may be specified
1088 * by ll sockaddr when protocol address is ambiguous.
1089 */
1090 if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1091 info->rti_info[RTAX_IFA] != NULL) {
1092 struct ifaddr *ifa = NULL;
1093
1094 NET_LOCK();
1095 if ((error = rtm_getifa(info, tableid)) != 0) {
1096 NET_UNLOCK();
1097 break;
1098 }
1099 ifa = info->rti_ifa;
1100 if (rt->rt_ifa != ifa) {
1101 ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1102 ifafree(rt->rt_ifa);
1103
1104 rt->rt_ifa = ifaref(ifa);
1105 rt->rt_ifidx = ifa->ifa_ifp->if_index;
1106 /* recheck link state after ifp change */
1107 rt_if_linkstate_change(rt, ifa->ifa_ifp,
1108 tableid);
1109 }
1110 NET_UNLOCK();
1111 }
1112 change:
1113 if (info->rti_info[RTAX_GATEWAY] != NULL) {
1114 /* When updating the gateway, make sure it is valid. */
1115 if (!newgate && rt->rt_gateway->sa_family !=
1116 info->rti_info[RTAX_GATEWAY]->sa_family) {
1117 error = EINVAL;
1118 break;
1119 }
1120
1121 NET_LOCK();
1122 error = rt_setgate(rt,
1123 info->rti_info[RTAX_GATEWAY], tableid);
1124 NET_UNLOCK();
1125 if (error)
1126 break;
1127 }
1128 #ifdef MPLS
1129 if (rtm->rtm_flags & RTF_MPLS) {
1130 NET_LOCK();
1131 error = rt_mpls_set(rt,
1132 info->rti_info[RTAX_SRC], info->rti_mpls);
1133 NET_UNLOCK();
1134 if (error)
1135 break;
1136 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1137 NET_LOCK();
1138 /* if gateway changed remove MPLS information */
1139 rt_mpls_clear(rt);
1140 NET_UNLOCK();
1141 }
1142 #endif
1143
1144 #ifdef BFD
1145 if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1146 KERNEL_LOCK();
1147 error = bfdset(rt);
1148 KERNEL_UNLOCK();
1149 if (error)
1150 break;
1151 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1152 ISSET(rtm->rtm_fmask, RTF_BFD)) {
1153 KERNEL_LOCK();
1154 bfdclear(rt);
1155 KERNEL_UNLOCK();
1156 }
1157 #endif
1158
1159 NET_LOCK();
1160 /* Hack to allow some flags to be toggled */
1161 if (rtm->rtm_fmask) {
1162 /* MPLS flag it is set by rt_mpls_set() */
1163 rtm->rtm_fmask &= ~RTF_MPLS;
1164 rtm->rtm_flags &= ~RTF_MPLS;
1165 rt->rt_flags =
1166 (rt->rt_flags & ~rtm->rtm_fmask) |
1167 (rtm->rtm_flags & rtm->rtm_fmask);
1168 }
1169 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1170
1171 ifp->if_rtrequest(ifp, RTM_ADD, rt);
1172
1173 if (info->rti_info[RTAX_LABEL] != NULL) {
1174 char *rtlabel = ((struct sockaddr_rtlabel *)
1175 info->rti_info[RTAX_LABEL])->sr_label;
1176 rtlabel_unref(rt->rt_labelid);
1177 rt->rt_labelid = rtlabel_name2id(rtlabel);
1178 }
1179 if_group_routechange(info->rti_info[RTAX_DST],
1180 info->rti_info[RTAX_NETMASK]);
1181 rt->rt_locks &= ~(rtm->rtm_inits);
1182 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1183 NET_UNLOCK();
1184 break;
1185 case RTM_GET:
1186 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1187 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1188 prio);
1189 if (rt == NULL)
1190 error = ESRCH;
1191 break;
1192 }
1193
1194 if_put(ifp);
1195 *prt = rt;
1196 return (error);
1197 }
1198
1199 struct ifaddr *
1200 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1201 unsigned int rtableid)
1202 {
1203 struct ifaddr *ifa;
1204
1205 if ((flags & RTF_GATEWAY) == 0) {
1206 /*
1207 * If we are adding a route to an interface,
1208 * and the interface is a pt to pt link
1209 * we should search for the destination
1210 * as our clue to the interface. Otherwise
1211 * we can use the local address.
1212 */
1213 ifa = NULL;
1214 if (flags & RTF_HOST)
1215 ifa = ifa_ifwithdstaddr(dst, rtableid);
1216 if (ifa == NULL)
1217 ifa = ifa_ifwithaddr(gateway, rtableid);
1218 } else {
1219 /*
1220 * If we are adding a route to a remote net
1221 * or host, the gateway may still be on the
1222 * other end of a pt to pt link.
1223 */
1224 ifa = ifa_ifwithdstaddr(gateway, rtableid);
1225 }
1226 if (ifa == NULL) {
1227 if (gateway->sa_family == AF_LINK) {
1228 struct sockaddr_dl *sdl = satosdl(gateway);
1229 struct ifnet *ifp = if_get(sdl->sdl_index);
1230
1231 if (ifp != NULL)
1232 ifa = ifaof_ifpforaddr(dst, ifp);
1233 if_put(ifp);
1234 } else {
1235 struct rtentry *rt;
1236
1237 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1238 if (rt != NULL)
1239 ifa = rt->rt_ifa;
1240 rtfree(rt);
1241 }
1242 }
1243 if (ifa == NULL)
1244 return (NULL);
1245 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1246 struct ifaddr *oifa = ifa;
1247 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1248 if (ifa == NULL)
1249 ifa = oifa;
1250 }
1251 return (ifa);
1252 }
1253
1254 int
1255 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1256 {
1257 struct ifnet *ifp = NULL;
1258
1259 /*
1260 * The "returned" `ifa' is guaranteed to be alive only if
1261 * the NET_LOCK() is held.
1262 */
1263 NET_ASSERT_LOCKED();
1264
1265 /*
1266 * ifp may be specified by sockaddr_dl when protocol address
1267 * is ambiguous
1268 */
1269 if (info->rti_info[RTAX_IFP] != NULL) {
1270 struct sockaddr_dl *sdl;
1271
1272 sdl = satosdl(info->rti_info[RTAX_IFP]);
1273 ifp = if_get(sdl->sdl_index);
1274 }
1275
1276 #ifdef IPSEC
1277 /*
1278 * If the destination is a PF_KEY address, we'll look
1279 * for the existence of a encap interface number or address
1280 * in the options list of the gateway. By default, we'll return
1281 * enc0.
1282 */
1283 if (info->rti_info[RTAX_DST] &&
1284 info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1285 info->rti_ifa = enc_getifa(rtid, 0);
1286 #endif
1287
1288 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1289 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1290
1291 if (info->rti_ifa == NULL) {
1292 struct sockaddr *sa;
1293
1294 if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1295 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1296 sa = info->rti_info[RTAX_DST];
1297
1298 if (sa != NULL && ifp != NULL)
1299 info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1300 else if (info->rti_info[RTAX_DST] != NULL &&
1301 info->rti_info[RTAX_GATEWAY] != NULL)
1302 info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1303 info->rti_info[RTAX_DST],
1304 info->rti_info[RTAX_GATEWAY],
1305 rtid);
1306 else if (sa != NULL)
1307 info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1308 sa, sa, rtid);
1309 }
1310
1311 if_put(ifp);
1312
1313 if (info->rti_ifa == NULL)
1314 return (ENETUNREACH);
1315
1316 return (0);
1317 }
1318
1319 int
1320 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1321 {
1322 struct rtentry *nhrt = arg;
1323
1324 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1325 !ISSET(rt->rt_locks, RTV_MTU))
1326 rt->rt_mtu = 0;
1327
1328 return (0);
1329 }
1330
1331 /*
1332 * Check if the user request to insert an ARP entry does not conflict
1333 * with existing ones.
1334 *
1335 * Only two entries are allowed for a given IP address: a private one
1336 * (priv) and a public one (pub).
1337 */
1338 int
1339 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1340 {
1341 int proxy = (info->rti_flags & RTF_ANNOUNCE);
1342
1343 if ((info->rti_flags & RTF_LLINFO) == 0 ||
1344 (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1345 return (0);
1346
1347 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1348 return (0);
1349
1350 /* If the entry is cached, it can be updated. */
1351 if (ISSET(rt->rt_flags, RTF_CACHED))
1352 return (0);
1353
1354 /*
1355 * Same destination, not cached and both "priv" or "pub" conflict.
1356 * If a second entry exists, it always conflict.
1357 */
1358 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1359 ISSET(rt->rt_flags, RTF_MPATH))
1360 return (EEXIST);
1361
1362 /* No conflict but an entry exist so we need to force mpath. */
1363 info->rti_flags |= RTF_MPATH;
1364 return (0);
1365 }
1366
1367 void
1368 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1369 struct rt_kmetrics *out)
1370 {
1371 int64_t expire;
1372
1373 if (which & RTV_MTU)
1374 out->rmx_mtu = in->rmx_mtu;
1375 if (which & RTV_EXPIRE) {
1376 expire = in->rmx_expire;
1377 if (expire != 0) {
1378 expire -= gettime();
1379 expire += getuptime();
1380 }
1381
1382 out->rmx_expire = expire;
1383 }
1384 }
1385
1386 void
1387 rtm_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
1388 {
1389 const struct rt_kmetrics *in = &rt->rt_rmx;
1390 int64_t expire;
1391
1392 expire = in->rmx_expire;
1393 if (expire == 0)
1394 expire = rt_timer_get_expire(rt);
1395 if (expire != 0) {
1396 expire -= getuptime();
1397 expire += gettime();
1398 }
1399
1400 bzero(out, sizeof(*out));
1401 out->rmx_locks = in->rmx_locks;
1402 out->rmx_mtu = in->rmx_mtu;
1403 out->rmx_expire = expire;
1404 out->rmx_pksent = in->rmx_pksent;
1405 }
1406
1407 #define ROUNDUP(a) \
1408 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1409 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1410
1411 int
1412 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1413 {
1414 struct sockaddr *sa;
1415 int i;
1416
1417 /*
1418 * Parse address bits, split address storage in chunks, and
1419 * set info pointers. Use sa_len for traversing the memory
1420 * and check that we stay within in the limit.
1421 */
1422 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1423 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1424 if ((rtinfo->rti_addrs & (1U << i)) == 0)
1425 continue;
1426 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1427 return (EINVAL);
1428 sa = (struct sockaddr *)cp;
1429 if (cp + sa->sa_len > cplim)
1430 return (EINVAL);
1431 rtinfo->rti_info[i] = sa;
1432 ADVANCE(cp, sa);
1433 }
1434 /*
1435 * Check that the address family is suitable for the route address
1436 * type. Check that each address has a size that fits its family
1437 * and its length is within the size. Strings within addresses must
1438 * be NUL terminated.
1439 */
1440 for (i = 0; i < RTAX_MAX; i++) {
1441 size_t len, maxlen, size;
1442
1443 sa = rtinfo->rti_info[i];
1444 if (sa == NULL)
1445 continue;
1446 maxlen = size = 0;
1447 switch (i) {
1448 case RTAX_DST:
1449 case RTAX_GATEWAY:
1450 case RTAX_SRC:
1451 switch (sa->sa_family) {
1452 case AF_INET:
1453 size = sizeof(struct sockaddr_in);
1454 break;
1455 case AF_LINK:
1456 size = sizeof(struct sockaddr_dl);
1457 break;
1458 #ifdef INET6
1459 case AF_INET6:
1460 size = sizeof(struct sockaddr_in6);
1461 break;
1462 #endif
1463 #ifdef MPLS
1464 case AF_MPLS:
1465 size = sizeof(struct sockaddr_mpls);
1466 break;
1467 #endif
1468 }
1469 break;
1470 case RTAX_IFP:
1471 if (sa->sa_family != AF_LINK)
1472 return (EAFNOSUPPORT);
1473 /*
1474 * XXX Should be sizeof(struct sockaddr_dl), but
1475 * route(8) has a bug and provides less memory.
1476 * arp(8) has another bug and uses sizeof pointer.
1477 */
1478 size = 4;
1479 break;
1480 case RTAX_IFA:
1481 switch (sa->sa_family) {
1482 case AF_INET:
1483 size = sizeof(struct sockaddr_in);
1484 break;
1485 #ifdef INET6
1486 case AF_INET6:
1487 size = sizeof(struct sockaddr_in6);
1488 break;
1489 #endif
1490 default:
1491 return (EAFNOSUPPORT);
1492 }
1493 break;
1494 case RTAX_LABEL:
1495 sa->sa_family = AF_UNSPEC;
1496 maxlen = RTLABEL_LEN;
1497 size = sizeof(struct sockaddr_rtlabel);
1498 break;
1499 #ifdef BFD
1500 case RTAX_BFD:
1501 sa->sa_family = AF_UNSPEC;
1502 size = sizeof(struct sockaddr_bfd);
1503 break;
1504 #endif
1505 case RTAX_DNS:
1506 /* more validation in rtm_validate_proposal */
1507 if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1508 return (EINVAL);
1509 if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1510 sr_dns))
1511 return (EINVAL);
1512 switch (sa->sa_family) {
1513 case AF_INET:
1514 #ifdef INET6
1515 case AF_INET6:
1516 #endif
1517 break;
1518 default:
1519 return (EAFNOSUPPORT);
1520 }
1521 break;
1522 case RTAX_STATIC:
1523 sa->sa_family = AF_UNSPEC;
1524 maxlen = RTSTATIC_LEN;
1525 size = sizeof(struct sockaddr_rtstatic);
1526 break;
1527 case RTAX_SEARCH:
1528 sa->sa_family = AF_UNSPEC;
1529 maxlen = RTSEARCH_LEN;
1530 size = sizeof(struct sockaddr_rtsearch);
1531 break;
1532 }
1533 if (size) {
1534 /* memory for the full struct must be provided */
1535 if (sa->sa_len < size)
1536 return (EINVAL);
1537 }
1538 if (maxlen) {
1539 /* this should not happen */
1540 if (2 + maxlen > size)
1541 return (EINVAL);
1542 /* strings must be NUL terminated within the struct */
1543 len = strnlen(sa->sa_data, maxlen);
1544 if (len >= maxlen || 2 + len >= sa->sa_len)
1545 return (EINVAL);
1546 break;
1547 }
1548 }
1549 return (0);
1550 }
1551
1552 struct mbuf *
1553 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1554 {
1555 struct rt_msghdr *rtm;
1556 struct mbuf *m;
1557 int i;
1558 struct sockaddr *sa;
1559 int len, dlen, hlen;
1560
1561 switch (type) {
1562 case RTM_DELADDR:
1563 case RTM_NEWADDR:
1564 hlen = sizeof(struct ifa_msghdr);
1565 break;
1566 case RTM_IFINFO:
1567 hlen = sizeof(struct if_msghdr);
1568 break;
1569 case RTM_IFANNOUNCE:
1570 hlen = sizeof(struct if_announcemsghdr);
1571 break;
1572 #ifdef BFD
1573 case RTM_BFD:
1574 hlen = sizeof(struct bfd_msghdr);
1575 break;
1576 #endif
1577 case RTM_80211INFO:
1578 hlen = sizeof(struct if_ieee80211_msghdr);
1579 break;
1580 default:
1581 hlen = sizeof(struct rt_msghdr);
1582 break;
1583 }
1584 len = hlen;
1585 for (i = 0; i < RTAX_MAX; i++) {
1586 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1587 continue;
1588 len += ROUNDUP(sa->sa_len);
1589 }
1590 if (len > MCLBYTES)
1591 panic("rtm_msg1");
1592 m = m_gethdr(M_DONTWAIT, MT_DATA);
1593 if (m && len > MHLEN) {
1594 MCLGET(m, M_DONTWAIT);
1595 if ((m->m_flags & M_EXT) == 0) {
1596 m_free(m);
1597 m = NULL;
1598 }
1599 }
1600 if (m == NULL)
1601 return (m);
1602 m->m_pkthdr.len = m->m_len = len;
1603 m->m_pkthdr.ph_ifidx = 0;
1604 rtm = mtod(m, struct rt_msghdr *);
1605 bzero(rtm, len);
1606 len = hlen;
1607 for (i = 0; i < RTAX_MAX; i++) {
1608 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1609 continue;
1610 rtinfo->rti_addrs |= (1U << i);
1611 dlen = ROUNDUP(sa->sa_len);
1612 if (m_copyback(m, len, sa->sa_len, sa, M_NOWAIT)) {
1613 m_freem(m);
1614 return (NULL);
1615 }
1616 len += dlen;
1617 }
1618 rtm->rtm_msglen = len;
1619 rtm->rtm_hdrlen = hlen;
1620 rtm->rtm_version = RTM_VERSION;
1621 rtm->rtm_type = type;
1622 return (m);
1623 }
1624
1625 int
1626 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1627 struct walkarg *w)
1628 {
1629 int i;
1630 int len, dlen, hlen, second_time = 0;
1631 caddr_t cp0;
1632
1633 rtinfo->rti_addrs = 0;
1634 again:
1635 switch (type) {
1636 case RTM_DELADDR:
1637 case RTM_NEWADDR:
1638 len = sizeof(struct ifa_msghdr);
1639 break;
1640 case RTM_IFINFO:
1641 len = sizeof(struct if_msghdr);
1642 break;
1643 default:
1644 len = sizeof(struct rt_msghdr);
1645 break;
1646 }
1647 hlen = len;
1648 if ((cp0 = cp) != NULL)
1649 cp += len;
1650 for (i = 0; i < RTAX_MAX; i++) {
1651 struct sockaddr *sa;
1652
1653 if ((sa = rtinfo->rti_info[i]) == NULL)
1654 continue;
1655 rtinfo->rti_addrs |= (1U << i);
1656 dlen = ROUNDUP(sa->sa_len);
1657 if (cp) {
1658 bcopy(sa, cp, sa->sa_len);
1659 bzero(cp + sa->sa_len, dlen - sa->sa_len);
1660 cp += dlen;
1661 }
1662 len += dlen;
1663 }
1664 /* align message length to the next natural boundary */
1665 len = ALIGN(len);
1666 if (cp == 0 && w != NULL && !second_time) {
1667 w->w_needed += len;
1668 if (w->w_needed <= w->w_given && w->w_where) {
1669 if (w->w_tmemsize < len) {
1670 free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1671 w->w_tmem = malloc(len, M_RTABLE,
1672 M_NOWAIT | M_ZERO);
1673 if (w->w_tmem)
1674 w->w_tmemsize = len;
1675 }
1676 if (w->w_tmem) {
1677 cp = w->w_tmem;
1678 second_time = 1;
1679 goto again;
1680 } else
1681 w->w_where = 0;
1682 }
1683 }
1684 if (cp && w) /* clear the message header */
1685 bzero(cp0, hlen);
1686
1687 if (cp) {
1688 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1689
1690 rtm->rtm_version = RTM_VERSION;
1691 rtm->rtm_type = type;
1692 rtm->rtm_msglen = len;
1693 rtm->rtm_hdrlen = hlen;
1694 }
1695 return (len);
1696 }
1697
1698 void
1699 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1700 {
1701 struct rt_addrinfo info;
1702 struct ifnet *ifp;
1703 struct sockaddr_rtlabel sa_rl;
1704 struct sockaddr_in6 sa_mask;
1705
1706 memset(&info, 0, sizeof(info));
1707 info.rti_info[RTAX_DST] = rt_key(rt);
1708 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1709 if (!ISSET(rt->rt_flags, RTF_HOST))
1710 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1711 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1712 ifp = if_get(rt->rt_ifidx);
1713 if (ifp != NULL) {
1714 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1715 info.rti_info[RTAX_IFA] = rtable_getsource(rtableid,
1716 info.rti_info[RTAX_DST]->sa_family);
1717 if (info.rti_info[RTAX_IFA] == NULL)
1718 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1719 }
1720
1721 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1722 rtableid);
1723 if_put(ifp);
1724 }
1725
1726 /*
1727 * This routine is called to generate a message from the routing
1728 * socket indicating that a redirect has occurred, a routing lookup
1729 * has failed, or that a protocol has detected timeouts to a particular
1730 * destination.
1731 */
1732 void
1733 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1734 u_int ifidx, int error, u_int tableid)
1735 {
1736 struct rt_msghdr *rtm;
1737 struct mbuf *m;
1738 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1739
1740 if (rtptable.rtp_count == 0)
1741 return;
1742 m = rtm_msg1(type, rtinfo);
1743 if (m == NULL)
1744 return;
1745 rtm = mtod(m, struct rt_msghdr *);
1746 rtm->rtm_flags = RTF_DONE | flags;
1747 rtm->rtm_priority = prio;
1748 rtm->rtm_errno = error;
1749 rtm->rtm_tableid = tableid;
1750 rtm->rtm_addrs = rtinfo->rti_addrs;
1751 rtm->rtm_index = ifidx;
1752 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1753 }
1754
1755 /*
1756 * This routine is called to generate a message from the routing
1757 * socket indicating that the status of a network interface has changed.
1758 */
1759 void
1760 rtm_ifchg(struct ifnet *ifp)
1761 {
1762 struct rt_addrinfo info;
1763 struct if_msghdr *ifm;
1764 struct mbuf *m;
1765
1766 if (rtptable.rtp_count == 0)
1767 return;
1768 memset(&info, 0, sizeof(info));
1769 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1770 m = rtm_msg1(RTM_IFINFO, &info);
1771 if (m == NULL)
1772 return;
1773 ifm = mtod(m, struct if_msghdr *);
1774 ifm->ifm_index = ifp->if_index;
1775 ifm->ifm_tableid = ifp->if_rdomain;
1776 ifm->ifm_flags = ifp->if_flags;
1777 ifm->ifm_xflags = ifp->if_xflags;
1778 if_getdata(ifp, &ifm->ifm_data);
1779 ifm->ifm_addrs = info.rti_addrs;
1780 route_input(m, NULL, AF_UNSPEC);
1781 }
1782
1783 /*
1784 * This is called to generate messages from the routing socket
1785 * indicating a network interface has had addresses associated with it.
1786 * if we ever reverse the logic and replace messages TO the routing
1787 * socket indicate a request to configure interfaces, then it will
1788 * be unnecessary as the routing socket will automatically generate
1789 * copies of it.
1790 */
1791 void
1792 rtm_addr(int cmd, struct ifaddr *ifa)
1793 {
1794 struct ifnet *ifp = ifa->ifa_ifp;
1795 struct mbuf *m;
1796 struct rt_addrinfo info;
1797 struct ifa_msghdr *ifam;
1798
1799 if (rtptable.rtp_count == 0)
1800 return;
1801
1802 memset(&info, 0, sizeof(info));
1803 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1804 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1805 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1806 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1807 if ((m = rtm_msg1(cmd, &info)) == NULL)
1808 return;
1809 ifam = mtod(m, struct ifa_msghdr *);
1810 ifam->ifam_index = ifp->if_index;
1811 ifam->ifam_metric = ifa->ifa_metric;
1812 ifam->ifam_flags = ifa->ifa_flags;
1813 ifam->ifam_addrs = info.rti_addrs;
1814 ifam->ifam_tableid = ifp->if_rdomain;
1815
1816 route_input(m, NULL,
1817 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1818 }
1819
1820 /*
1821 * This is called to generate routing socket messages indicating
1822 * network interface arrival and departure.
1823 */
1824 void
1825 rtm_ifannounce(struct ifnet *ifp, int what)
1826 {
1827 struct if_announcemsghdr *ifan;
1828 struct mbuf *m;
1829
1830 if (rtptable.rtp_count == 0)
1831 return;
1832 m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1833 if (m == NULL)
1834 return;
1835 ifan = mtod(m, struct if_announcemsghdr *);
1836 ifan->ifan_index = ifp->if_index;
1837 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1838 ifan->ifan_what = what;
1839 route_input(m, NULL, AF_UNSPEC);
1840 }
1841
1842 #ifdef BFD
1843 /*
1844 * This is used to generate routing socket messages indicating
1845 * the state of a BFD session.
1846 */
1847 void
1848 rtm_bfd(struct bfd_config *bfd)
1849 {
1850 struct bfd_msghdr *bfdm;
1851 struct sockaddr_bfd sa_bfd;
1852 struct mbuf *m;
1853 struct rt_addrinfo info;
1854
1855 if (rtptable.rtp_count == 0)
1856 return;
1857 memset(&info, 0, sizeof(info));
1858 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1859 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1860
1861 m = rtm_msg1(RTM_BFD, &info);
1862 if (m == NULL)
1863 return;
1864 bfdm = mtod(m, struct bfd_msghdr *);
1865 bfdm->bm_addrs = info.rti_addrs;
1866
1867 KERNEL_ASSERT_LOCKED();
1868 bfd2sa(bfd->bc_rt, &sa_bfd);
1869 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1870
1871 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1872 }
1873 #endif /* BFD */
1874
1875 /*
1876 * This is used to generate routing socket messages indicating
1877 * the state of an ieee80211 interface.
1878 */
1879 void
1880 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1881 {
1882 struct if_ieee80211_msghdr *ifim;
1883 struct mbuf *m;
1884
1885 if (rtptable.rtp_count == 0)
1886 return;
1887 m = rtm_msg1(RTM_80211INFO, NULL);
1888 if (m == NULL)
1889 return;
1890 ifim = mtod(m, struct if_ieee80211_msghdr *);
1891 ifim->ifim_index = ifp->if_index;
1892 ifim->ifim_tableid = ifp->if_rdomain;
1893
1894 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1895 route_input(m, NULL, AF_UNSPEC);
1896 }
1897
1898 /*
1899 * This is used to generate routing socket messages indicating
1900 * the address selection proposal from an interface.
1901 */
1902 void
1903 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1904 uint8_t prio)
1905 {
1906 struct rt_msghdr *rtm;
1907 struct mbuf *m;
1908
1909 m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1910 if (m == NULL)
1911 return;
1912 rtm = mtod(m, struct rt_msghdr *);
1913 rtm->rtm_flags = RTF_DONE | flags;
1914 rtm->rtm_priority = prio;
1915 rtm->rtm_tableid = ifp->if_rdomain;
1916 rtm->rtm_index = ifp->if_index;
1917 rtm->rtm_addrs = rtinfo->rti_addrs;
1918
1919 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1920 }
1921
1922 /*
1923 * This is used in dumping the kernel table via sysctl().
1924 */
1925 int
1926 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1927 {
1928 struct walkarg *w = v;
1929 int error = 0, size;
1930 struct rt_addrinfo info;
1931 struct ifnet *ifp;
1932 #ifdef BFD
1933 struct sockaddr_bfd sa_bfd;
1934 #endif
1935 struct sockaddr_rtlabel sa_rl;
1936 struct sockaddr_in6 sa_mask;
1937
1938 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1939 return 0;
1940 if (w->w_op == NET_RT_DUMP && w->w_arg) {
1941 u_int8_t prio = w->w_arg & RTP_MASK;
1942 if (w->w_arg < 0) {
1943 prio = (-w->w_arg) & RTP_MASK;
1944 /* Show all routes that are not this priority */
1945 if (prio == (rt->rt_priority & RTP_MASK))
1946 return 0;
1947 } else {
1948 if (prio != (rt->rt_priority & RTP_MASK) &&
1949 prio != RTP_ANY)
1950 return 0;
1951 }
1952 }
1953 bzero(&info, sizeof(info));
1954 info.rti_info[RTAX_DST] = rt_key(rt);
1955 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1956 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1957 ifp = if_get(rt->rt_ifidx);
1958 if (ifp != NULL) {
1959 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1960 info.rti_info[RTAX_IFA] =
1961 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
1962 if (info.rti_info[RTAX_IFA] == NULL)
1963 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1964 if (ifp->if_flags & IFF_POINTOPOINT)
1965 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1966 }
1967 if_put(ifp);
1968 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1969 #ifdef BFD
1970 if (rt->rt_flags & RTF_BFD) {
1971 KERNEL_ASSERT_LOCKED();
1972 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1973 }
1974 #endif
1975 #ifdef MPLS
1976 if (rt->rt_flags & RTF_MPLS) {
1977 struct sockaddr_mpls sa_mpls;
1978
1979 bzero(&sa_mpls, sizeof(sa_mpls));
1980 sa_mpls.smpls_family = AF_MPLS;
1981 sa_mpls.smpls_len = sizeof(sa_mpls);
1982 sa_mpls.smpls_label = ((struct rt_mpls *)
1983 rt->rt_llinfo)->mpls_label;
1984 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1985 info.rti_mpls = ((struct rt_mpls *)
1986 rt->rt_llinfo)->mpls_operation;
1987 }
1988 #endif
1989
1990 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1991 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) {
1992 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1993
1994 rtm->rtm_pid = curproc->p_p->ps_pid;
1995 rtm->rtm_flags = RTF_DONE | rt->rt_flags;
1996 rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1997 rtm_getmetrics(rt, &rtm->rtm_rmx);
1998 /* Do not account the routing table's reference. */
1999 rtm->rtm_rmx.rmx_refcnt = refcnt_read(&rt->rt_refcnt) - 1;
2000 rtm->rtm_index = rt->rt_ifidx;
2001 rtm->rtm_addrs = info.rti_addrs;
2002 rtm->rtm_tableid = id;
2003 #ifdef MPLS
2004 rtm->rtm_mpls = info.rti_mpls;
2005 #endif
2006 if ((error = copyout(rtm, w->w_where, size)) != 0)
2007 w->w_where = NULL;
2008 else
2009 w->w_where += size;
2010 }
2011 return (error);
2012 }
2013
2014 int
2015 sysctl_iflist(int af, struct walkarg *w)
2016 {
2017 struct ifnet *ifp;
2018 struct ifaddr *ifa;
2019 struct rt_addrinfo info;
2020 int len, error = 0;
2021
2022 bzero(&info, sizeof(info));
2023 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2024 if (w->w_arg && w->w_arg != ifp->if_index)
2025 continue;
2026 /* Copy the link-layer address first */
2027 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
2028 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
2029 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) {
2030 struct if_msghdr *ifm;
2031
2032 ifm = (struct if_msghdr *)w->w_tmem;
2033 ifm->ifm_index = ifp->if_index;
2034 ifm->ifm_tableid = ifp->if_rdomain;
2035 ifm->ifm_flags = ifp->if_flags;
2036 if_getdata(ifp, &ifm->ifm_data);
2037 ifm->ifm_addrs = info.rti_addrs;
2038 error = copyout(ifm, w->w_where, len);
2039 if (error)
2040 return (error);
2041 w->w_where += len;
2042 }
2043 info.rti_info[RTAX_IFP] = NULL;
2044 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2045 KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
2046 if (af && af != ifa->ifa_addr->sa_family)
2047 continue;
2048 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2049 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2050 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2051 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
2052 if (w->w_where && w->w_tmem &&
2053 w->w_needed <= w->w_given) {
2054 struct ifa_msghdr *ifam;
2055
2056 ifam = (struct ifa_msghdr *)w->w_tmem;
2057 ifam->ifam_index = ifa->ifa_ifp->if_index;
2058 ifam->ifam_flags = ifa->ifa_flags;
2059 ifam->ifam_metric = ifa->ifa_metric;
2060 ifam->ifam_addrs = info.rti_addrs;
2061 error = copyout(w->w_tmem, w->w_where, len);
2062 if (error)
2063 return (error);
2064 w->w_where += len;
2065 }
2066 }
2067 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2068 info.rti_info[RTAX_BRD] = NULL;
2069 }
2070 return (0);
2071 }
2072
2073 int
2074 sysctl_ifnames(struct walkarg *w)
2075 {
2076 struct if_nameindex_msg ifn;
2077 struct ifnet *ifp;
2078 int error = 0;
2079
2080 /* XXX ignore tableid for now */
2081 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2082 if (w->w_arg && w->w_arg != ifp->if_index)
2083 continue;
2084 w->w_needed += sizeof(ifn);
2085 if (w->w_where && w->w_needed <= w->w_given) {
2086
2087 memset(&ifn, 0, sizeof(ifn));
2088 ifn.if_index = ifp->if_index;
2089 strlcpy(ifn.if_name, ifp->if_xname,
2090 sizeof(ifn.if_name));
2091 error = copyout(&ifn, w->w_where, sizeof(ifn));
2092 if (error)
2093 return (error);
2094 w->w_where += sizeof(ifn);
2095 }
2096 }
2097
2098 return (0);
2099 }
2100
2101 int
2102 sysctl_source(int af, u_int tableid, struct walkarg *w)
2103 {
2104 struct sockaddr *sa;
2105 int size, error = 0;
2106
2107 sa = rtable_getsource(tableid, af);
2108 if (sa) {
2109 switch (sa->sa_family) {
2110 case AF_INET:
2111 size = sizeof(struct sockaddr_in);
2112 break;
2113 #ifdef INET6
2114 case AF_INET6:
2115 size = sizeof(struct sockaddr_in6);
2116 break;
2117 #endif
2118 default:
2119 return (0);
2120 }
2121 w->w_needed += size;
2122 if (w->w_where && w->w_needed <= w->w_given) {
2123 if ((error = copyout(sa, w->w_where, size)))
2124 return (error);
2125 w->w_where += size;
2126 }
2127 }
2128 return (0);
2129 }
2130
2131 int
2132 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2133 size_t newlen)
2134 {
2135 int i, error = EINVAL;
2136 u_char af;
2137 struct walkarg w;
2138 struct rt_tableinfo tableinfo;
2139 u_int tableid = 0;
2140
2141 if (new)
2142 return (EPERM);
2143 if (namelen < 3 || namelen > 4)
2144 return (EINVAL);
2145 af = name[0];
2146 bzero(&w, sizeof(w));
2147 w.w_where = where;
2148 w.w_given = *given;
2149 w.w_op = name[1];
2150 w.w_arg = name[2];
2151
2152 if (namelen == 4) {
2153 tableid = name[3];
2154 if (!rtable_exists(tableid))
2155 return (ENOENT);
2156 } else
2157 tableid = curproc->p_p->ps_rtableid;
2158
2159 switch (w.w_op) {
2160 case NET_RT_DUMP:
2161 case NET_RT_FLAGS:
2162 NET_LOCK();
2163 for (i = 1; i <= AF_MAX; i++) {
2164 if (af != 0 && af != i)
2165 continue;
2166
2167 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2168 &w);
2169 if (error == EAFNOSUPPORT)
2170 error = 0;
2171 if (error)
2172 break;
2173 }
2174 NET_UNLOCK();
2175 break;
2176
2177 case NET_RT_IFLIST:
2178 NET_LOCK();
2179 error = sysctl_iflist(af, &w);
2180 NET_UNLOCK();
2181 break;
2182
2183 case NET_RT_STATS:
2184 return (sysctl_rtable_rtstat(where, given, new));
2185 case NET_RT_TABLE:
2186 tableid = w.w_arg;
2187 if (!rtable_exists(tableid))
2188 return (ENOENT);
2189 memset(&tableinfo, 0, sizeof tableinfo);
2190 tableinfo.rti_tableid = tableid;
2191 tableinfo.rti_domainid = rtable_l2(tableid);
2192 error = sysctl_rdstruct(where, given, new,
2193 &tableinfo, sizeof(tableinfo));
2194 return (error);
2195 case NET_RT_IFNAMES:
2196 NET_LOCK();
2197 error = sysctl_ifnames(&w);
2198 NET_UNLOCK();
2199 break;
2200 case NET_RT_SOURCE:
2201 tableid = w.w_arg;
2202 if (!rtable_exists(tableid))
2203 return (ENOENT);
2204 NET_LOCK();
2205 for (i = 1; i <= AF_MAX; i++) {
2206 if (af != 0 && af != i)
2207 continue;
2208
2209 error = sysctl_source(i, tableid, &w);
2210 if (error == EAFNOSUPPORT)
2211 error = 0;
2212 if (error)
2213 break;
2214 }
2215 NET_UNLOCK();
2216 break;
2217 }
2218 free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2219 if (where) {
2220 *given = w.w_where - (caddr_t)where;
2221 if (w.w_needed > w.w_given)
2222 return (ENOMEM);
2223 } else if (w.w_needed == 0) {
2224 *given = 0;
2225 } else {
2226 *given = roundup(w.w_needed + MAX(w.w_needed / 10, 1024),
2227 PAGE_SIZE);
2228 }
2229 return (error);
2230 }
2231
2232 int
2233 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2234 {
2235 extern struct cpumem *rtcounters;
2236 uint64_t counters[rts_ncounters];
2237 struct rtstat rtstat;
2238 uint32_t *words = (uint32_t *)&rtstat;
2239 int i;
2240
2241 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2242 memset(&rtstat, 0, sizeof rtstat);
2243 counters_read(rtcounters, counters, nitems(counters));
2244
2245 for (i = 0; i < nitems(counters); i++)
2246 words[i] = (uint32_t)counters[i];
2247
2248 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2249 }
2250
2251 int
2252 rtm_validate_proposal(struct rt_addrinfo *info)
2253 {
2254 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2255 RTA_SEARCH)) {
2256 return -1;
2257 }
2258
2259 if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2260 struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2261 if (sa == NULL)
2262 return -1;
2263 switch (sa->sa_family) {
2264 case AF_INET:
2265 if (sa->sa_len != sizeof(struct sockaddr_in))
2266 return -1;
2267 break;
2268 case AF_INET6:
2269 if (sa->sa_len != sizeof(struct sockaddr_in6))
2270 return -1;
2271 break;
2272 default:
2273 return -1;
2274 }
2275 }
2276
2277 if (ISSET(info->rti_addrs, RTA_IFA)) {
2278 struct sockaddr *sa = info->rti_info[RTAX_IFA];
2279 if (sa == NULL)
2280 return -1;
2281 switch (sa->sa_family) {
2282 case AF_INET:
2283 if (sa->sa_len != sizeof(struct sockaddr_in))
2284 return -1;
2285 break;
2286 case AF_INET6:
2287 if (sa->sa_len != sizeof(struct sockaddr_in6))
2288 return -1;
2289 break;
2290 default:
2291 return -1;
2292 }
2293 }
2294
2295 if (ISSET(info->rti_addrs, RTA_DNS)) {
2296 struct sockaddr_rtdns *rtdns =
2297 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2298 if (rtdns == NULL)
2299 return -1;
2300 if (rtdns->sr_len > sizeof(*rtdns))
2301 return -1;
2302 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2303 return -1;
2304 switch (rtdns->sr_family) {
2305 case AF_INET:
2306 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2307 sr_dns)) % sizeof(struct in_addr) != 0)
2308 return -1;
2309 break;
2310 #ifdef INET6
2311 case AF_INET6:
2312 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2313 sr_dns)) % sizeof(struct in6_addr) != 0)
2314 return -1;
2315 break;
2316 #endif
2317 default:
2318 return -1;
2319 }
2320 }
2321
2322 if (ISSET(info->rti_addrs, RTA_STATIC)) {
2323 struct sockaddr_rtstatic *rtstatic =
2324 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2325 if (rtstatic == NULL)
2326 return -1;
2327 if (rtstatic->sr_len > sizeof(*rtstatic))
2328 return -1;
2329 if (rtstatic->sr_len <=
2330 offsetof(struct sockaddr_rtstatic, sr_static))
2331 return -1;
2332 }
2333
2334 if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2335 struct sockaddr_rtsearch *rtsearch =
2336 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2337 if (rtsearch == NULL)
2338 return -1;
2339 if (rtsearch->sr_len > sizeof(*rtsearch))
2340 return -1;
2341 if (rtsearch->sr_len <=
2342 offsetof(struct sockaddr_rtsearch, sr_search))
2343 return -1;
2344 }
2345
2346 return 0;
2347 }
2348
2349 int
2350 rt_setsource(unsigned int rtableid, struct sockaddr *src)
2351 {
2352 struct ifaddr *ifa;
2353 int error;
2354 /*
2355 * If source address is 0.0.0.0 or ::
2356 * use automatic source selection
2357 */
2358 switch(src->sa_family) {
2359 case AF_INET:
2360 if(satosin(src)->sin_addr.s_addr == INADDR_ANY) {
2361 rtable_setsource(rtableid, AF_INET, NULL);
2362 return (0);
2363 }
2364 break;
2365 #ifdef INET6
2366 case AF_INET6:
2367 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
2368 rtable_setsource(rtableid, AF_INET6, NULL);
2369 return (0);
2370 }
2371 break;
2372 #endif
2373 default:
2374 return (EAFNOSUPPORT);
2375 }
2376
2377 KERNEL_LOCK();
2378 /*
2379 * Check if source address is assigned to an interface in the
2380 * same rdomain
2381 */
2382 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) {
2383 KERNEL_UNLOCK();
2384 return (EINVAL);
2385 }
2386
2387 error = rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr);
2388 KERNEL_UNLOCK();
2389
2390 return (error);
2391 }
2392
2393 /*
2394 * Definitions of protocols supported in the ROUTE domain.
2395 */
2396
2397 const struct pr_usrreqs route_usrreqs = {
2398 .pru_attach = route_attach,
2399 .pru_detach = route_detach,
2400 .pru_disconnect = route_disconnect,
2401 .pru_shutdown = route_shutdown,
2402 .pru_rcvd = route_rcvd,
2403 .pru_send = route_send,
2404 .pru_sockaddr = route_sockaddr,
2405 .pru_peeraddr = route_peeraddr,
2406 };
2407
2408 const struct protosw routesw[] = {
2409 {
2410 .pr_type = SOCK_RAW,
2411 .pr_domain = &routedomain,
2412 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2413 .pr_ctloutput = route_ctloutput,
2414 .pr_usrreqs = &route_usrreqs,
2415 .pr_init = route_prinit,
2416 .pr_sysctl = sysctl_rtable
2417 }
2418 };
2419
2420 const struct domain routedomain = {
2421 .dom_family = PF_ROUTE,
2422 .dom_name = "route",
2423 .dom_init = route_init,
2424 .dom_protosw = routesw,
2425 .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2426 };
Cache object: f077cf0d9fdd6e414ef14724dd100efe
|