FreeBSD/Linux Kernel Cross Reference
sys/net/rtsock.c
1 /*-
2 * Copyright (c) 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95
30 * $FreeBSD: releng/6.1/sys/net/rtsock.c 157504 2006-04-04 20:07:23Z andre $
31 */
32
33 #include <sys/param.h>
34 #include <sys/domain.h>
35 #include <sys/kernel.h>
36 #include <sys/jail.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/proc.h>
40 #include <sys/protosw.h>
41 #include <sys/signalvar.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46
47 #include <net/if.h>
48 #include <net/netisr.h>
49 #include <net/raw_cb.h>
50 #include <net/route.h>
51
52 #include <netinet/in.h>
53
54 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
55
56 /* NB: these are not modified */
57 static struct sockaddr route_dst = { 2, PF_ROUTE, };
58 static struct sockaddr route_src = { 2, PF_ROUTE, };
59 static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
60
61 static struct {
62 int ip_count; /* attached w/ AF_INET */
63 int ip6_count; /* attached w/ AF_INET6 */
64 int ipx_count; /* attached w/ AF_IPX */
65 int any_count; /* total attached */
66 } route_cb;
67
68 struct mtx rtsock_mtx;
69 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
70
71 #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx)
72 #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
73 #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)
74
75 static struct ifqueue rtsintrq;
76
77 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
78 SYSCTL_INT(_net_route, OID_AUTO, netisr_maxqlen, CTLFLAG_RW,
79 &rtsintrq.ifq_maxlen, 0, "maximum routing socket dispatch queue length");
80
81 struct walkarg {
82 int w_tmemsize;
83 int w_op, w_arg;
84 caddr_t w_tmem;
85 struct sysctl_req *w_req;
86 };
87
88 static void rts_input(struct mbuf *m);
89 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
90 static int rt_msg2(int type, struct rt_addrinfo *rtinfo,
91 caddr_t cp, struct walkarg *w);
92 static int rt_xaddrs(caddr_t cp, caddr_t cplim,
93 struct rt_addrinfo *rtinfo);
94 static int sysctl_dumpentry(struct radix_node *rn, void *vw);
95 static int sysctl_iflist(int af, struct walkarg *w);
96 static int sysctl_ifmalist(int af, struct walkarg *w);
97 static int route_output(struct mbuf *m, struct socket *so);
98 static void rt_setmetrics(u_long which, const struct rt_metrics *in,
99 struct rt_metrics_lite *out);
100 static void rt_getmetrics(const struct rt_metrics_lite *in,
101 struct rt_metrics *out);
102 static void rt_dispatch(struct mbuf *, const struct sockaddr *);
103
104 static void
105 rts_init(void)
106 {
107 int tmp;
108
109 rtsintrq.ifq_maxlen = 256;
110 if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
111 rtsintrq.ifq_maxlen = tmp;
112 mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF);
113 netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, NETISR_MPSAFE);
114 }
115 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0)
116
117 static void
118 rts_input(struct mbuf *m)
119 {
120 struct sockproto route_proto;
121 unsigned short *family;
122 struct m_tag *tag;
123
124 route_proto.sp_family = PF_ROUTE;
125 tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
126 if (tag != NULL) {
127 family = (unsigned short *)(tag + 1);
128 route_proto.sp_protocol = *family;
129 m_tag_delete(m, tag);
130 } else
131 route_proto.sp_protocol = 0;
132
133 raw_input(m, &route_proto, &route_src, &route_dst);
134 }
135
136 /*
137 * It really doesn't make any sense at all for this code to share much
138 * with raw_usrreq.c, since its functionality is so restricted. XXX
139 */
140 static int
141 rts_abort(struct socket *so)
142 {
143
144 return (raw_usrreqs.pru_abort(so));
145 }
146
147 /* pru_accept is EOPNOTSUPP */
148
149 static int
150 rts_attach(struct socket *so, int proto, struct thread *td)
151 {
152 struct rawcb *rp;
153 int s, error;
154
155 if (sotorawcb(so) != NULL)
156 return EISCONN; /* XXX panic? */
157 /* XXX */
158 MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
159 if (rp == NULL)
160 return ENOBUFS;
161
162 /*
163 * The splnet() is necessary to block protocols from sending
164 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
165 * this PCB is extant but incompletely initialized.
166 * Probably we should try to do more of this work beforehand and
167 * eliminate the spl.
168 */
169 s = splnet();
170 so->so_pcb = (caddr_t)rp;
171 error = raw_attach(so, proto);
172 rp = sotorawcb(so);
173 if (error) {
174 splx(s);
175 so->so_pcb = NULL;
176 free(rp, M_PCB);
177 return error;
178 }
179 RTSOCK_LOCK();
180 switch(rp->rcb_proto.sp_protocol) {
181 case AF_INET:
182 route_cb.ip_count++;
183 break;
184 case AF_INET6:
185 route_cb.ip6_count++;
186 break;
187 case AF_IPX:
188 route_cb.ipx_count++;
189 break;
190 }
191 rp->rcb_faddr = &route_src;
192 route_cb.any_count++;
193 RTSOCK_UNLOCK();
194 soisconnected(so);
195 so->so_options |= SO_USELOOPBACK;
196 splx(s);
197 return 0;
198 }
199
200 static int
201 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
202 {
203
204 return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
205 }
206
207 static int
208 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
209 {
210
211 return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
212 }
213
214 /* pru_connect2 is EOPNOTSUPP */
215 /* pru_control is EOPNOTSUPP */
216
217 static int
218 rts_detach(struct socket *so)
219 {
220 struct rawcb *rp = sotorawcb(so);
221 int s, error;
222
223 s = splnet();
224 if (rp != NULL) {
225 RTSOCK_LOCK();
226 switch(rp->rcb_proto.sp_protocol) {
227 case AF_INET:
228 route_cb.ip_count--;
229 break;
230 case AF_INET6:
231 route_cb.ip6_count--;
232 break;
233 case AF_IPX:
234 route_cb.ipx_count--;
235 break;
236 }
237 route_cb.any_count--;
238 RTSOCK_UNLOCK();
239 }
240 error = raw_usrreqs.pru_detach(so);
241 splx(s);
242 return error;
243 }
244
245 static int
246 rts_disconnect(struct socket *so)
247 {
248
249 return (raw_usrreqs.pru_disconnect(so));
250 }
251
252 /* pru_listen is EOPNOTSUPP */
253
254 static int
255 rts_peeraddr(struct socket *so, struct sockaddr **nam)
256 {
257
258 return (raw_usrreqs.pru_peeraddr(so, nam));
259 }
260
261 /* pru_rcvd is EOPNOTSUPP */
262 /* pru_rcvoob is EOPNOTSUPP */
263
264 static int
265 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
266 struct mbuf *control, struct thread *td)
267 {
268
269 return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
270 }
271
272 /* pru_sense is null */
273
274 static int
275 rts_shutdown(struct socket *so)
276 {
277
278 return (raw_usrreqs.pru_shutdown(so));
279 }
280
281 static int
282 rts_sockaddr(struct socket *so, struct sockaddr **nam)
283 {
284
285 return (raw_usrreqs.pru_sockaddr(so, nam));
286 }
287
288 static struct pr_usrreqs route_usrreqs = {
289 .pru_abort = rts_abort,
290 .pru_attach = rts_attach,
291 .pru_bind = rts_bind,
292 .pru_connect = rts_connect,
293 .pru_detach = rts_detach,
294 .pru_disconnect = rts_disconnect,
295 .pru_peeraddr = rts_peeraddr,
296 .pru_send = rts_send,
297 .pru_shutdown = rts_shutdown,
298 .pru_sockaddr = rts_sockaddr,
299 };
300
301 /*ARGSUSED*/
302 static int
303 route_output(struct mbuf *m, struct socket *so)
304 {
305 #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
306 struct rt_msghdr *rtm = NULL;
307 struct rtentry *rt = NULL;
308 struct radix_node_head *rnh;
309 struct rt_addrinfo info;
310 int len, error = 0;
311 struct ifnet *ifp = NULL;
312 struct ifaddr *ifa = NULL;
313 struct sockaddr_in jail;
314
315 #define senderr(e) { error = e; goto flush;}
316 if (m == NULL || ((m->m_len < sizeof(long)) &&
317 (m = m_pullup(m, sizeof(long))) == NULL))
318 return (ENOBUFS);
319 if ((m->m_flags & M_PKTHDR) == 0)
320 panic("route_output");
321 len = m->m_pkthdr.len;
322 if (len < sizeof(*rtm) ||
323 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
324 info.rti_info[RTAX_DST] = NULL;
325 senderr(EINVAL);
326 }
327 R_Malloc(rtm, struct rt_msghdr *, len);
328 if (rtm == NULL) {
329 info.rti_info[RTAX_DST] = NULL;
330 senderr(ENOBUFS);
331 }
332 m_copydata(m, 0, len, (caddr_t)rtm);
333 if (rtm->rtm_version != RTM_VERSION) {
334 info.rti_info[RTAX_DST] = NULL;
335 senderr(EPROTONOSUPPORT);
336 }
337 rtm->rtm_pid = curproc->p_pid;
338 bzero(&info, sizeof(info));
339 info.rti_addrs = rtm->rtm_addrs;
340 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
341 info.rti_info[RTAX_DST] = NULL;
342 senderr(EINVAL);
343 }
344 info.rti_flags = rtm->rtm_flags;
345 if (info.rti_info[RTAX_DST] == NULL ||
346 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
347 (info.rti_info[RTAX_GATEWAY] != NULL &&
348 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
349 senderr(EINVAL);
350 if (info.rti_info[RTAX_GENMASK]) {
351 struct radix_node *t;
352 t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1);
353 if (t != NULL &&
354 bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1,
355 (char *)(void *)t->rn_key + 1,
356 ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0)
357 info.rti_info[RTAX_GENMASK] =
358 (struct sockaddr *)t->rn_key;
359 else
360 senderr(ENOBUFS);
361 }
362
363 /*
364 * Verify that the caller has the appropriate privilege; RTM_GET
365 * is the only operation the non-superuser is allowed.
366 */
367 if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0)
368 senderr(error);
369
370 switch (rtm->rtm_type) {
371 struct rtentry *saved_nrt;
372
373 case RTM_ADD:
374 if (info.rti_info[RTAX_GATEWAY] == NULL)
375 senderr(EINVAL);
376 saved_nrt = NULL;
377 error = rtrequest1(RTM_ADD, &info, &saved_nrt);
378 if (error == 0 && saved_nrt) {
379 RT_LOCK(saved_nrt);
380 rt_setmetrics(rtm->rtm_inits,
381 &rtm->rtm_rmx, &saved_nrt->rt_rmx);
382 rtm->rtm_index = saved_nrt->rt_ifp->if_index;
383 RT_REMREF(saved_nrt);
384 saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
385 RT_UNLOCK(saved_nrt);
386 }
387 break;
388
389 case RTM_DELETE:
390 saved_nrt = NULL;
391 error = rtrequest1(RTM_DELETE, &info, &saved_nrt);
392 if (error == 0) {
393 RT_LOCK(saved_nrt);
394 rt = saved_nrt;
395 goto report;
396 }
397 break;
398
399 case RTM_GET:
400 case RTM_CHANGE:
401 case RTM_LOCK:
402 rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
403 if (rnh == NULL)
404 senderr(EAFNOSUPPORT);
405 RADIX_NODE_HEAD_LOCK(rnh);
406 rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
407 info.rti_info[RTAX_NETMASK], rnh);
408 if (rt == NULL) { /* XXX looks bogus */
409 RADIX_NODE_HEAD_UNLOCK(rnh);
410 senderr(ESRCH);
411 }
412 RT_LOCK(rt);
413 RT_ADDREF(rt);
414 RADIX_NODE_HEAD_UNLOCK(rnh);
415
416 switch(rtm->rtm_type) {
417
418 case RTM_GET:
419 report:
420 RT_LOCK_ASSERT(rt);
421 info.rti_info[RTAX_DST] = rt_key(rt);
422 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
423 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
424 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
425 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
426 ifp = rt->rt_ifp;
427 if (ifp) {
428 info.rti_info[RTAX_IFP] =
429 ifaddr_byindex(ifp->if_index)->ifa_addr;
430 if (jailed(so->so_cred)) {
431 bzero(&jail, sizeof(jail));
432 jail.sin_family = PF_INET;
433 jail.sin_len = sizeof(jail);
434 jail.sin_addr.s_addr =
435 htonl(prison_getip(so->so_cred));
436 info.rti_info[RTAX_IFA] =
437 (struct sockaddr *)&jail;
438 } else
439 info.rti_info[RTAX_IFA] =
440 rt->rt_ifa->ifa_addr;
441 if (ifp->if_flags & IFF_POINTOPOINT)
442 info.rti_info[RTAX_BRD] =
443 rt->rt_ifa->ifa_dstaddr;
444 rtm->rtm_index = ifp->if_index;
445 } else {
446 info.rti_info[RTAX_IFP] = NULL;
447 info.rti_info[RTAX_IFA] = NULL;
448 }
449 } else if ((ifp = rt->rt_ifp) != NULL) {
450 rtm->rtm_index = ifp->if_index;
451 }
452 len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
453 if (len > rtm->rtm_msglen) {
454 struct rt_msghdr *new_rtm;
455 R_Malloc(new_rtm, struct rt_msghdr *, len);
456 if (new_rtm == NULL) {
457 RT_UNLOCK(rt);
458 senderr(ENOBUFS);
459 }
460 bcopy(rtm, new_rtm, rtm->rtm_msglen);
461 Free(rtm); rtm = new_rtm;
462 }
463 (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
464 rtm->rtm_flags = rt->rt_flags;
465 rtm->rtm_use = 0;
466 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
467 rtm->rtm_addrs = info.rti_addrs;
468 break;
469
470 case RTM_CHANGE:
471 /*
472 * New gateway could require new ifaddr, ifp;
473 * flags may also be different; ifp may be specified
474 * by ll sockaddr when protocol address is ambiguous
475 */
476 if (((rt->rt_flags & RTF_GATEWAY) &&
477 info.rti_info[RTAX_GATEWAY] != NULL) ||
478 info.rti_info[RTAX_IFP] != NULL ||
479 (info.rti_info[RTAX_IFA] != NULL &&
480 !sa_equal(info.rti_info[RTAX_IFA],
481 rt->rt_ifa->ifa_addr))) {
482 RT_UNLOCK(rt);
483 if ((error = rt_getifa(&info)) != 0)
484 senderr(error);
485 RT_LOCK(rt);
486 }
487 if (info.rti_info[RTAX_GATEWAY] != NULL &&
488 (error = rt_setgate(rt, rt_key(rt),
489 info.rti_info[RTAX_GATEWAY])) != 0) {
490 RT_UNLOCK(rt);
491 senderr(error);
492 }
493 if ((ifa = info.rti_ifa) != NULL) {
494 struct ifaddr *oifa = rt->rt_ifa;
495 if (oifa != ifa) {
496 if (oifa) {
497 if (oifa->ifa_rtrequest)
498 oifa->ifa_rtrequest(
499 RTM_DELETE, rt,
500 &info);
501 IFAFREE(oifa);
502 }
503 IFAREF(ifa);
504 rt->rt_ifa = ifa;
505 rt->rt_ifp = info.rti_ifp;
506 }
507 }
508 /* Allow some flags to be toggled on change. */
509 if (rtm->rtm_fmask & RTF_FMASK)
510 rt->rt_flags = (rt->rt_flags &
511 ~rtm->rtm_fmask) |
512 (rtm->rtm_flags & rtm->rtm_fmask);
513 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
514 &rt->rt_rmx);
515 rtm->rtm_index = rt->rt_ifp->if_index;
516 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
517 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
518 if (info.rti_info[RTAX_GENMASK])
519 rt->rt_genmask = info.rti_info[RTAX_GENMASK];
520 /* FALLTHROUGH */
521 case RTM_LOCK:
522 /* We don't support locks anymore */
523 break;
524 }
525 RT_UNLOCK(rt);
526 break;
527
528 default:
529 senderr(EOPNOTSUPP);
530 }
531
532 flush:
533 if (rtm) {
534 if (error)
535 rtm->rtm_errno = error;
536 else
537 rtm->rtm_flags |= RTF_DONE;
538 }
539 if (rt) /* XXX can this be true? */
540 RTFREE(rt);
541 {
542 struct rawcb *rp = NULL;
543 /*
544 * Check to see if we don't want our own messages.
545 */
546 if ((so->so_options & SO_USELOOPBACK) == 0) {
547 if (route_cb.any_count <= 1) {
548 if (rtm)
549 Free(rtm);
550 m_freem(m);
551 return (error);
552 }
553 /* There is another listener, so construct message */
554 rp = sotorawcb(so);
555 }
556 if (rtm) {
557 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
558 if (m->m_pkthdr.len < rtm->rtm_msglen) {
559 m_freem(m);
560 m = NULL;
561 } else if (m->m_pkthdr.len > rtm->rtm_msglen)
562 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
563 Free(rtm);
564 }
565 if (m) {
566 if (rp) {
567 /*
568 * XXX insure we don't get a copy by
569 * invalidating our protocol
570 */
571 unsigned short family = rp->rcb_proto.sp_family;
572 rp->rcb_proto.sp_family = 0;
573 rt_dispatch(m, info.rti_info[RTAX_DST]);
574 rp->rcb_proto.sp_family = family;
575 } else
576 rt_dispatch(m, info.rti_info[RTAX_DST]);
577 }
578 }
579 return (error);
580 #undef sa_equal
581 }
582
583 static void
584 rt_setmetrics(u_long which, const struct rt_metrics *in,
585 struct rt_metrics_lite *out)
586 {
587 #define metric(f, e) if (which & (f)) out->e = in->e;
588 /*
589 * Only these are stored in the routing entry since introduction
590 * of tcp hostcache. The rest is ignored.
591 */
592 metric(RTV_MTU, rmx_mtu);
593 metric(RTV_EXPIRE, rmx_expire);
594 #undef metric
595 }
596
597 static void
598 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
599 {
600 #define metric(e) out->e = in->e;
601 bzero(out, sizeof(*out));
602 metric(rmx_mtu);
603 metric(rmx_expire);
604 #undef metric
605 }
606
607 /*
608 * Extract the addresses of the passed sockaddrs.
609 * Do a little sanity checking so as to avoid bad memory references.
610 * This data is derived straight from userland.
611 */
612 static int
613 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
614 {
615 struct sockaddr *sa;
616 int i;
617
618 for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
619 if ((rtinfo->rti_addrs & (1 << i)) == 0)
620 continue;
621 sa = (struct sockaddr *)cp;
622 /*
623 * It won't fit.
624 */
625 if (cp + sa->sa_len > cplim)
626 return (EINVAL);
627 /*
628 * there are no more.. quit now
629 * If there are more bits, they are in error.
630 * I've seen this. route(1) can evidently generate these.
631 * This causes kernel to core dump.
632 * for compatibility, If we see this, point to a safe address.
633 */
634 if (sa->sa_len == 0) {
635 rtinfo->rti_info[i] = &sa_zero;
636 return (0); /* should be EINVAL but for compat */
637 }
638 /* accept it */
639 rtinfo->rti_info[i] = sa;
640 cp += SA_SIZE(sa);
641 }
642 return (0);
643 }
644
645 static struct mbuf *
646 rt_msg1(int type, struct rt_addrinfo *rtinfo)
647 {
648 struct rt_msghdr *rtm;
649 struct mbuf *m;
650 int i;
651 struct sockaddr *sa;
652 int len, dlen;
653
654 switch (type) {
655
656 case RTM_DELADDR:
657 case RTM_NEWADDR:
658 len = sizeof(struct ifa_msghdr);
659 break;
660
661 case RTM_DELMADDR:
662 case RTM_NEWMADDR:
663 len = sizeof(struct ifma_msghdr);
664 break;
665
666 case RTM_IFINFO:
667 len = sizeof(struct if_msghdr);
668 break;
669
670 case RTM_IFANNOUNCE:
671 case RTM_IEEE80211:
672 len = sizeof(struct if_announcemsghdr);
673 break;
674
675 default:
676 len = sizeof(struct rt_msghdr);
677 }
678 if (len > MCLBYTES)
679 panic("rt_msg1");
680 m = m_gethdr(M_DONTWAIT, MT_DATA);
681 if (m && len > MHLEN) {
682 MCLGET(m, M_DONTWAIT);
683 if ((m->m_flags & M_EXT) == 0) {
684 m_free(m);
685 m = NULL;
686 }
687 }
688 if (m == NULL)
689 return (m);
690 m->m_pkthdr.len = m->m_len = len;
691 m->m_pkthdr.rcvif = NULL;
692 rtm = mtod(m, struct rt_msghdr *);
693 bzero((caddr_t)rtm, len);
694 for (i = 0; i < RTAX_MAX; i++) {
695 if ((sa = rtinfo->rti_info[i]) == NULL)
696 continue;
697 rtinfo->rti_addrs |= (1 << i);
698 dlen = SA_SIZE(sa);
699 m_copyback(m, len, dlen, (caddr_t)sa);
700 len += dlen;
701 }
702 if (m->m_pkthdr.len != len) {
703 m_freem(m);
704 return (NULL);
705 }
706 rtm->rtm_msglen = len;
707 rtm->rtm_version = RTM_VERSION;
708 rtm->rtm_type = type;
709 return (m);
710 }
711
712 static int
713 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
714 {
715 int i;
716 int len, dlen, second_time = 0;
717 caddr_t cp0;
718
719 rtinfo->rti_addrs = 0;
720 again:
721 switch (type) {
722
723 case RTM_DELADDR:
724 case RTM_NEWADDR:
725 len = sizeof(struct ifa_msghdr);
726 break;
727
728 case RTM_IFINFO:
729 len = sizeof(struct if_msghdr);
730 break;
731
732 case RTM_NEWMADDR:
733 len = sizeof(struct ifma_msghdr);
734 break;
735
736 default:
737 len = sizeof(struct rt_msghdr);
738 }
739 cp0 = cp;
740 if (cp0)
741 cp += len;
742 for (i = 0; i < RTAX_MAX; i++) {
743 struct sockaddr *sa;
744
745 if ((sa = rtinfo->rti_info[i]) == NULL)
746 continue;
747 rtinfo->rti_addrs |= (1 << i);
748 dlen = SA_SIZE(sa);
749 if (cp) {
750 bcopy((caddr_t)sa, cp, (unsigned)dlen);
751 cp += dlen;
752 }
753 len += dlen;
754 }
755 len = ALIGN(len);
756 if (cp == NULL && w != NULL && !second_time) {
757 struct walkarg *rw = w;
758
759 if (rw->w_req) {
760 if (rw->w_tmemsize < len) {
761 if (rw->w_tmem)
762 free(rw->w_tmem, M_RTABLE);
763 rw->w_tmem = (caddr_t)
764 malloc(len, M_RTABLE, M_NOWAIT);
765 if (rw->w_tmem)
766 rw->w_tmemsize = len;
767 }
768 if (rw->w_tmem) {
769 cp = rw->w_tmem;
770 second_time = 1;
771 goto again;
772 }
773 }
774 }
775 if (cp) {
776 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
777
778 rtm->rtm_version = RTM_VERSION;
779 rtm->rtm_type = type;
780 rtm->rtm_msglen = len;
781 }
782 return (len);
783 }
784
785 /*
786 * This routine is called to generate a message from the routing
787 * socket indicating that a redirect has occured, a routing lookup
788 * has failed, or that a protocol has detected timeouts to a particular
789 * destination.
790 */
791 void
792 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
793 {
794 struct rt_msghdr *rtm;
795 struct mbuf *m;
796 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
797
798 if (route_cb.any_count == 0)
799 return;
800 m = rt_msg1(type, rtinfo);
801 if (m == NULL)
802 return;
803 rtm = mtod(m, struct rt_msghdr *);
804 rtm->rtm_flags = RTF_DONE | flags;
805 rtm->rtm_errno = error;
806 rtm->rtm_addrs = rtinfo->rti_addrs;
807 rt_dispatch(m, sa);
808 }
809
810 /*
811 * This routine is called to generate a message from the routing
812 * socket indicating that the status of a network interface has changed.
813 */
814 void
815 rt_ifmsg(struct ifnet *ifp)
816 {
817 struct if_msghdr *ifm;
818 struct mbuf *m;
819 struct rt_addrinfo info;
820
821 if (route_cb.any_count == 0)
822 return;
823 bzero((caddr_t)&info, sizeof(info));
824 m = rt_msg1(RTM_IFINFO, &info);
825 if (m == NULL)
826 return;
827 ifm = mtod(m, struct if_msghdr *);
828 ifm->ifm_index = ifp->if_index;
829 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
830 ifm->ifm_data = ifp->if_data;
831 ifm->ifm_addrs = 0;
832 rt_dispatch(m, NULL);
833 }
834
835 /*
836 * This is called to generate messages from the routing socket
837 * indicating a network interface has had addresses associated with it.
838 * if we ever reverse the logic and replace messages TO the routing
839 * socket indicate a request to configure interfaces, then it will
840 * be unnecessary as the routing socket will automatically generate
841 * copies of it.
842 */
843 void
844 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
845 {
846 struct rt_addrinfo info;
847 struct sockaddr *sa = NULL;
848 int pass;
849 struct mbuf *m = NULL;
850 struct ifnet *ifp = ifa->ifa_ifp;
851
852 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
853 ("unexpected cmd %u", cmd));
854
855 if (route_cb.any_count == 0)
856 return;
857 for (pass = 1; pass < 3; pass++) {
858 bzero((caddr_t)&info, sizeof(info));
859 if ((cmd == RTM_ADD && pass == 1) ||
860 (cmd == RTM_DELETE && pass == 2)) {
861 struct ifa_msghdr *ifam;
862 int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
863
864 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
865 info.rti_info[RTAX_IFP] =
866 ifaddr_byindex(ifp->if_index)->ifa_addr;
867 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
868 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
869 if ((m = rt_msg1(ncmd, &info)) == NULL)
870 continue;
871 ifam = mtod(m, struct ifa_msghdr *);
872 ifam->ifam_index = ifp->if_index;
873 ifam->ifam_metric = ifa->ifa_metric;
874 ifam->ifam_flags = ifa->ifa_flags;
875 ifam->ifam_addrs = info.rti_addrs;
876 }
877 if ((cmd == RTM_ADD && pass == 2) ||
878 (cmd == RTM_DELETE && pass == 1)) {
879 struct rt_msghdr *rtm;
880
881 if (rt == NULL)
882 continue;
883 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
884 info.rti_info[RTAX_DST] = sa = rt_key(rt);
885 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
886 if ((m = rt_msg1(cmd, &info)) == NULL)
887 continue;
888 rtm = mtod(m, struct rt_msghdr *);
889 rtm->rtm_index = ifp->if_index;
890 rtm->rtm_flags |= rt->rt_flags;
891 rtm->rtm_errno = error;
892 rtm->rtm_addrs = info.rti_addrs;
893 }
894 rt_dispatch(m, sa);
895 }
896 }
897
898 /*
899 * This is the analogue to the rt_newaddrmsg which performs the same
900 * function but for multicast group memberhips. This is easier since
901 * there is no route state to worry about.
902 */
903 void
904 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
905 {
906 struct rt_addrinfo info;
907 struct mbuf *m = NULL;
908 struct ifnet *ifp = ifma->ifma_ifp;
909 struct ifma_msghdr *ifmam;
910
911 if (route_cb.any_count == 0)
912 return;
913
914 bzero((caddr_t)&info, sizeof(info));
915 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
916 info.rti_info[RTAX_IFP] =
917 ifp ? ifaddr_byindex(ifp->if_index)->ifa_addr : NULL;
918 /*
919 * If a link-layer address is present, present it as a ``gateway''
920 * (similarly to how ARP entries, e.g., are presented).
921 */
922 info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
923 m = rt_msg1(cmd, &info);
924 if (m == NULL)
925 return;
926 ifmam = mtod(m, struct ifma_msghdr *);
927 ifmam->ifmam_index = ifp->if_index;
928 ifmam->ifmam_addrs = info.rti_addrs;
929 rt_dispatch(m, ifma->ifma_addr);
930 }
931
932 static struct mbuf *
933 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
934 struct rt_addrinfo *info)
935 {
936 struct if_announcemsghdr *ifan;
937 struct mbuf *m;
938
939 if (route_cb.any_count == 0)
940 return NULL;
941 bzero((caddr_t)info, sizeof(*info));
942 m = rt_msg1(type, info);
943 if (m != NULL) {
944 ifan = mtod(m, struct if_announcemsghdr *);
945 ifan->ifan_index = ifp->if_index;
946 strlcpy(ifan->ifan_name, ifp->if_xname,
947 sizeof(ifan->ifan_name));
948 ifan->ifan_what = what;
949 }
950 return m;
951 }
952
953 /*
954 * This is called to generate routing socket messages indicating
955 * IEEE80211 wireless events.
956 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
957 */
958 void
959 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
960 {
961 struct mbuf *m;
962 struct rt_addrinfo info;
963
964 m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
965 if (m != NULL) {
966 /*
967 * Append the ieee80211 data. Try to stick it in the
968 * mbuf containing the ifannounce msg; otherwise allocate
969 * a new mbuf and append.
970 *
971 * NB: we assume m is a single mbuf.
972 */
973 if (data_len > M_TRAILINGSPACE(m)) {
974 struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
975 if (n == NULL) {
976 m_freem(m);
977 return;
978 }
979 bcopy(data, mtod(n, void *), data_len);
980 n->m_len = data_len;
981 m->m_next = n;
982 } else if (data_len > 0) {
983 bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
984 m->m_len += data_len;
985 }
986 if (m->m_flags & M_PKTHDR)
987 m->m_pkthdr.len += data_len;
988 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
989 rt_dispatch(m, NULL);
990 }
991 }
992
993 /*
994 * This is called to generate routing socket messages indicating
995 * network interface arrival and departure.
996 */
997 void
998 rt_ifannouncemsg(struct ifnet *ifp, int what)
999 {
1000 struct mbuf *m;
1001 struct rt_addrinfo info;
1002
1003 m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
1004 if (m != NULL)
1005 rt_dispatch(m, NULL);
1006 }
1007
1008 static void
1009 rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
1010 {
1011 struct m_tag *tag;
1012
1013 /*
1014 * Preserve the family from the sockaddr, if any, in an m_tag for
1015 * use when injecting the mbuf into the routing socket buffer from
1016 * the netisr.
1017 */
1018 if (sa != NULL) {
1019 tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
1020 M_NOWAIT);
1021 if (tag == NULL) {
1022 m_freem(m);
1023 return;
1024 }
1025 *(unsigned short *)(tag + 1) = sa->sa_family;
1026 m_tag_prepend(m, tag);
1027 }
1028 netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */
1029 }
1030
1031 /*
1032 * This is used in dumping the kernel table via sysctl().
1033 */
1034 static int
1035 sysctl_dumpentry(struct radix_node *rn, void *vw)
1036 {
1037 struct walkarg *w = vw;
1038 struct rtentry *rt = (struct rtentry *)rn;
1039 int error = 0, size;
1040 struct rt_addrinfo info;
1041
1042 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1043 return 0;
1044 bzero((caddr_t)&info, sizeof(info));
1045 info.rti_info[RTAX_DST] = rt_key(rt);
1046 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1047 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1048 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1049 if (rt->rt_ifp) {
1050 info.rti_info[RTAX_IFP] =
1051 ifaddr_byindex(rt->rt_ifp->if_index)->ifa_addr;
1052 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1053 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1054 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1055 }
1056 size = rt_msg2(RTM_GET, &info, NULL, w);
1057 if (w->w_req && w->w_tmem) {
1058 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1059
1060 rtm->rtm_flags = rt->rt_flags;
1061 rtm->rtm_use = rt->rt_rmx.rmx_pksent;
1062 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1063 rtm->rtm_index = rt->rt_ifp->if_index;
1064 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1065 rtm->rtm_addrs = info.rti_addrs;
1066 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1067 return (error);
1068 }
1069 return (error);
1070 }
1071
1072 static int
1073 sysctl_iflist(int af, struct walkarg *w)
1074 {
1075 struct ifnet *ifp;
1076 struct ifaddr *ifa;
1077 struct rt_addrinfo info;
1078 int len, error = 0;
1079
1080 bzero((caddr_t)&info, sizeof(info));
1081 IFNET_RLOCK();
1082 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1083 if (w->w_arg && w->w_arg != ifp->if_index)
1084 continue;
1085 ifa = ifaddr_byindex(ifp->if_index);
1086 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1087 len = rt_msg2(RTM_IFINFO, &info, NULL, w);
1088 info.rti_info[RTAX_IFP] = NULL;
1089 if (w->w_req && w->w_tmem) {
1090 struct if_msghdr *ifm;
1091
1092 ifm = (struct if_msghdr *)w->w_tmem;
1093 ifm->ifm_index = ifp->if_index;
1094 ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1095 ifm->ifm_data = ifp->if_data;
1096 ifm->ifm_addrs = info.rti_addrs;
1097 error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
1098 if (error)
1099 goto done;
1100 }
1101 while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1102 if (af && af != ifa->ifa_addr->sa_family)
1103 continue;
1104 if (jailed(curthread->td_ucred) &&
1105 prison_if(curthread->td_ucred, ifa->ifa_addr))
1106 continue;
1107 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1108 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1109 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1110 len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
1111 if (w->w_req && w->w_tmem) {
1112 struct ifa_msghdr *ifam;
1113
1114 ifam = (struct ifa_msghdr *)w->w_tmem;
1115 ifam->ifam_index = ifa->ifa_ifp->if_index;
1116 ifam->ifam_flags = ifa->ifa_flags;
1117 ifam->ifam_metric = ifa->ifa_metric;
1118 ifam->ifam_addrs = info.rti_addrs;
1119 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1120 if (error)
1121 goto done;
1122 }
1123 }
1124 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1125 info.rti_info[RTAX_BRD] = NULL;
1126 }
1127 done:
1128 IFNET_RUNLOCK();
1129 return (error);
1130 }
1131
1132 int
1133 sysctl_ifmalist(int af, struct walkarg *w)
1134 {
1135 struct ifnet *ifp;
1136 struct ifmultiaddr *ifma;
1137 struct rt_addrinfo info;
1138 int len, error = 0;
1139 struct ifaddr *ifa;
1140
1141 bzero((caddr_t)&info, sizeof(info));
1142 IFNET_RLOCK();
1143 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1144 if (w->w_arg && w->w_arg != ifp->if_index)
1145 continue;
1146 ifa = ifaddr_byindex(ifp->if_index);
1147 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1148 IF_ADDR_LOCK(ifp);
1149 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1150 if (af && af != ifma->ifma_addr->sa_family)
1151 continue;
1152 if (jailed(curproc->p_ucred) &&
1153 prison_if(curproc->p_ucred, ifma->ifma_addr))
1154 continue;
1155 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1156 info.rti_info[RTAX_GATEWAY] =
1157 (ifma->ifma_addr->sa_family != AF_LINK) ?
1158 ifma->ifma_lladdr : NULL;
1159 len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
1160 if (w->w_req && w->w_tmem) {
1161 struct ifma_msghdr *ifmam;
1162
1163 ifmam = (struct ifma_msghdr *)w->w_tmem;
1164 ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1165 ifmam->ifmam_flags = 0;
1166 ifmam->ifmam_addrs = info.rti_addrs;
1167 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1168 if (error) {
1169 IF_ADDR_UNLOCK(ifp);
1170 goto done;
1171 }
1172 }
1173 }
1174 IF_ADDR_UNLOCK(ifp);
1175 }
1176 done:
1177 IFNET_RUNLOCK();
1178 return (error);
1179 }
1180
1181 static int
1182 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1183 {
1184 int *name = (int *)arg1;
1185 u_int namelen = arg2;
1186 struct radix_node_head *rnh;
1187 int i, lim, error = EINVAL;
1188 u_char af;
1189 struct walkarg w;
1190
1191 name ++;
1192 namelen--;
1193 if (req->newptr)
1194 return (EPERM);
1195 if (namelen != 3)
1196 return ((namelen < 3) ? EISDIR : ENOTDIR);
1197 af = name[0];
1198 if (af > AF_MAX)
1199 return (EINVAL);
1200 bzero(&w, sizeof(w));
1201 w.w_op = name[1];
1202 w.w_arg = name[2];
1203 w.w_req = req;
1204
1205 error = sysctl_wire_old_buffer(req, 0);
1206 if (error)
1207 return (error);
1208 switch (w.w_op) {
1209
1210 case NET_RT_DUMP:
1211 case NET_RT_FLAGS:
1212 if (af == 0) { /* dump all tables */
1213 i = 1;
1214 lim = AF_MAX;
1215 } else /* dump only one table */
1216 i = lim = af;
1217 for (error = 0; error == 0 && i <= lim; i++)
1218 if ((rnh = rt_tables[i]) != NULL) {
1219 RADIX_NODE_HEAD_LOCK(rnh);
1220 error = rnh->rnh_walktree(rnh,
1221 sysctl_dumpentry, &w);
1222 RADIX_NODE_HEAD_UNLOCK(rnh);
1223 } else if (af != 0)
1224 error = EAFNOSUPPORT;
1225 break;
1226
1227 case NET_RT_IFLIST:
1228 error = sysctl_iflist(af, &w);
1229 break;
1230
1231 case NET_RT_IFMALIST:
1232 error = sysctl_ifmalist(af, &w);
1233 break;
1234 }
1235 if (w.w_tmem)
1236 free(w.w_tmem, M_RTABLE);
1237 return (error);
1238 }
1239
1240 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1241
1242 /*
1243 * Definitions of protocols supported in the ROUTE domain.
1244 */
1245
1246 extern struct domain routedomain; /* or at least forward */
1247
1248 static struct protosw routesw[] = {
1249 {
1250 .pr_type = SOCK_RAW,
1251 .pr_domain = &routedomain,
1252 .pr_flags = PR_ATOMIC|PR_ADDR,
1253 .pr_output = route_output,
1254 .pr_ctlinput = raw_ctlinput,
1255 .pr_init = raw_init,
1256 .pr_usrreqs = &route_usrreqs
1257 }
1258 };
1259
1260 static struct domain routedomain = {
1261 .dom_family = PF_ROUTE,
1262 .dom_name = "route",
1263 .dom_protosw = routesw,
1264 .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])]
1265 };
1266
1267 DOMAIN_SET(route);
Cache object: 7e4192d5d37476321c4ac51175cc94fc
|