FreeBSD/Linux Kernel Cross Reference
sys/net/rtsock.c
1 /*
2 * Copyright (c) 1988, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95
30 * $FreeBSD: releng/5.3/sys/net/rtsock.c 136588 2004-10-16 08:43:07Z cvs2svn $
31 */
32
33 #include <sys/param.h>
34 #include <sys/domain.h>
35 #include <sys/kernel.h>
36 #include <sys/jail.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/proc.h>
40 #include <sys/protosw.h>
41 #include <sys/signalvar.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46
47 #include <net/if.h>
48 #include <net/netisr.h>
49 #include <net/raw_cb.h>
50 #include <net/route.h>
51
52 #include <netinet/in.h>
53
54 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
55
56 /* NB: these are not modified */
57 static struct sockaddr route_dst = { 2, PF_ROUTE, };
58 static struct sockaddr route_src = { 2, PF_ROUTE, };
59 static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
60
61 static struct {
62 int ip_count; /* attached w/ AF_INET */
63 int ip6_count; /* attached w/ AF_INET6 */
64 int ipx_count; /* attached w/ AF_IPX */
65 int any_count; /* total attached */
66 } route_cb;
67
68 struct mtx rtsock_mtx;
69 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
70
71 #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx)
72 #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
73 #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)
74
75 static struct ifqueue rtsintrq;
76
77 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
78 SYSCTL_INT(_net_route, OID_AUTO, netisr_maxqlen, CTLFLAG_RW,
79 &rtsintrq.ifq_maxlen, 0, "maximum routing socket dispatch queue length");
80
81 struct walkarg {
82 int w_tmemsize;
83 int w_op, w_arg;
84 caddr_t w_tmem;
85 struct sysctl_req *w_req;
86 };
87
88 static void rts_input(struct mbuf *m);
89 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
90 static int rt_msg2(int type, struct rt_addrinfo *rtinfo,
91 caddr_t cp, struct walkarg *w);
92 static int rt_xaddrs(caddr_t cp, caddr_t cplim,
93 struct rt_addrinfo *rtinfo);
94 static int sysctl_dumpentry(struct radix_node *rn, void *vw);
95 static int sysctl_iflist(int af, struct walkarg *w);
96 static int sysctl_ifmalist(int af, struct walkarg *w);
97 static int route_output(struct mbuf *m, struct socket *so);
98 static void rt_setmetrics(u_long which, const struct rt_metrics *in,
99 struct rt_metrics_lite *out);
100 static void rt_getmetrics(const struct rt_metrics_lite *in,
101 struct rt_metrics *out);
102 static void rt_dispatch(struct mbuf *, const struct sockaddr *);
103
104 static void
105 rts_init(void)
106 {
107 int tmp;
108
109 rtsintrq.ifq_maxlen = 256;
110 if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
111 rtsintrq.ifq_maxlen = tmp;
112 mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF);
113 netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, NETISR_MPSAFE);
114 }
115 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0)
116
117 static void
118 rts_input(struct mbuf *m)
119 {
120 struct sockproto route_proto;
121 unsigned short *family;
122 struct m_tag *tag;
123
124 route_proto.sp_family = PF_ROUTE;
125 tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
126 if (tag != NULL) {
127 family = (unsigned short *)(tag + 1);
128 route_proto.sp_protocol = *family;
129 m_tag_delete(m, tag);
130 } else
131 route_proto.sp_protocol = 0;
132
133 raw_input(m, &route_proto, &route_src, &route_dst);
134 }
135
136 /*
137 * It really doesn't make any sense at all for this code to share much
138 * with raw_usrreq.c, since its functionality is so restricted. XXX
139 */
140 static int
141 rts_abort(struct socket *so)
142 {
143 int s, error;
144 s = splnet();
145 error = raw_usrreqs.pru_abort(so);
146 splx(s);
147 return error;
148 }
149
150 /* pru_accept is EOPNOTSUPP */
151
152 static int
153 rts_attach(struct socket *so, int proto, struct thread *td)
154 {
155 struct rawcb *rp;
156 int s, error;
157
158 if (sotorawcb(so) != NULL)
159 return EISCONN; /* XXX panic? */
160 /* XXX */
161 MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
162 if (rp == NULL)
163 return ENOBUFS;
164
165 /*
166 * The splnet() is necessary to block protocols from sending
167 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
168 * this PCB is extant but incompletely initialized.
169 * Probably we should try to do more of this work beforehand and
170 * eliminate the spl.
171 */
172 s = splnet();
173 so->so_pcb = (caddr_t)rp;
174 error = raw_attach(so, proto);
175 rp = sotorawcb(so);
176 if (error) {
177 splx(s);
178 so->so_pcb = NULL;
179 free(rp, M_PCB);
180 return error;
181 }
182 RTSOCK_LOCK();
183 switch(rp->rcb_proto.sp_protocol) {
184 case AF_INET:
185 route_cb.ip_count++;
186 break;
187 case AF_INET6:
188 route_cb.ip6_count++;
189 break;
190 case AF_IPX:
191 route_cb.ipx_count++;
192 break;
193 }
194 rp->rcb_faddr = &route_src;
195 route_cb.any_count++;
196 RTSOCK_UNLOCK();
197 soisconnected(so);
198 so->so_options |= SO_USELOOPBACK;
199 splx(s);
200 return 0;
201 }
202
203 static int
204 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
205 {
206 int s, error;
207 s = splnet();
208 error = raw_usrreqs.pru_bind(so, nam, td); /* xxx just EINVAL */
209 splx(s);
210 return error;
211 }
212
213 static int
214 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
215 {
216 int s, error;
217 s = splnet();
218 error = raw_usrreqs.pru_connect(so, nam, td); /* XXX just EINVAL */
219 splx(s);
220 return error;
221 }
222
223 /* pru_connect2 is EOPNOTSUPP */
224 /* pru_control is EOPNOTSUPP */
225
226 static int
227 rts_detach(struct socket *so)
228 {
229 struct rawcb *rp = sotorawcb(so);
230 int s, error;
231
232 s = splnet();
233 if (rp != NULL) {
234 RTSOCK_LOCK();
235 switch(rp->rcb_proto.sp_protocol) {
236 case AF_INET:
237 route_cb.ip_count--;
238 break;
239 case AF_INET6:
240 route_cb.ip6_count--;
241 break;
242 case AF_IPX:
243 route_cb.ipx_count--;
244 break;
245 }
246 route_cb.any_count--;
247 RTSOCK_UNLOCK();
248 }
249 error = raw_usrreqs.pru_detach(so);
250 splx(s);
251 return error;
252 }
253
254 static int
255 rts_disconnect(struct socket *so)
256 {
257 int s, error;
258 s = splnet();
259 error = raw_usrreqs.pru_disconnect(so);
260 splx(s);
261 return error;
262 }
263
264 /* pru_listen is EOPNOTSUPP */
265
266 static int
267 rts_peeraddr(struct socket *so, struct sockaddr **nam)
268 {
269 int s, error;
270 s = splnet();
271 error = raw_usrreqs.pru_peeraddr(so, nam);
272 splx(s);
273 return error;
274 }
275
276 /* pru_rcvd is EOPNOTSUPP */
277 /* pru_rcvoob is EOPNOTSUPP */
278
279 static int
280 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
281 struct mbuf *control, struct thread *td)
282 {
283 int s, error;
284 s = splnet();
285 error = raw_usrreqs.pru_send(so, flags, m, nam, control, td);
286 splx(s);
287 return error;
288 }
289
290 /* pru_sense is null */
291
292 static int
293 rts_shutdown(struct socket *so)
294 {
295 int s, error;
296 s = splnet();
297 error = raw_usrreqs.pru_shutdown(so);
298 splx(s);
299 return error;
300 }
301
302 static int
303 rts_sockaddr(struct socket *so, struct sockaddr **nam)
304 {
305 int s, error;
306 s = splnet();
307 error = raw_usrreqs.pru_sockaddr(so, nam);
308 splx(s);
309 return error;
310 }
311
312 static struct pr_usrreqs route_usrreqs = {
313 rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect,
314 pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect,
315 pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp,
316 rts_send, pru_sense_null, rts_shutdown, rts_sockaddr,
317 sosend, soreceive, sopoll, pru_sosetlabel_null
318 };
319
320 /*ARGSUSED*/
321 static int
322 route_output(struct mbuf *m, struct socket *so)
323 {
324 #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
325 struct rt_msghdr *rtm = NULL;
326 struct rtentry *rt = NULL;
327 struct radix_node_head *rnh;
328 struct rt_addrinfo info;
329 int len, error = 0;
330 struct ifnet *ifp = NULL;
331 struct ifaddr *ifa = NULL;
332 struct sockaddr_in jail;
333
334 #define senderr(e) { error = e; goto flush;}
335 if (m == NULL || ((m->m_len < sizeof(long)) &&
336 (m = m_pullup(m, sizeof(long))) == NULL))
337 return (ENOBUFS);
338 if ((m->m_flags & M_PKTHDR) == 0)
339 panic("route_output");
340 len = m->m_pkthdr.len;
341 if (len < sizeof(*rtm) ||
342 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
343 info.rti_info[RTAX_DST] = NULL;
344 senderr(EINVAL);
345 }
346 R_Malloc(rtm, struct rt_msghdr *, len);
347 if (rtm == NULL) {
348 info.rti_info[RTAX_DST] = NULL;
349 senderr(ENOBUFS);
350 }
351 m_copydata(m, 0, len, (caddr_t)rtm);
352 if (rtm->rtm_version != RTM_VERSION) {
353 info.rti_info[RTAX_DST] = NULL;
354 senderr(EPROTONOSUPPORT);
355 }
356 rtm->rtm_pid = curproc->p_pid;
357 bzero(&info, sizeof(info));
358 info.rti_addrs = rtm->rtm_addrs;
359 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
360 info.rti_info[RTAX_DST] = NULL;
361 senderr(EINVAL);
362 }
363 info.rti_flags = rtm->rtm_flags;
364 if (info.rti_info[RTAX_DST] == NULL ||
365 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
366 (info.rti_info[RTAX_GATEWAY] != NULL &&
367 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
368 senderr(EINVAL);
369 if (info.rti_info[RTAX_GENMASK]) {
370 struct radix_node *t;
371 t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1);
372 if (t != NULL &&
373 bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1,
374 (char *)(void *)t->rn_key + 1,
375 ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0)
376 info.rti_info[RTAX_GENMASK] =
377 (struct sockaddr *)t->rn_key;
378 else
379 senderr(ENOBUFS);
380 }
381
382 /*
383 * Verify that the caller has the appropriate privilege; RTM_GET
384 * is the only operation the non-superuser is allowed.
385 */
386 if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0)
387 senderr(error);
388
389 switch (rtm->rtm_type) {
390 struct rtentry *saved_nrt;
391
392 case RTM_ADD:
393 if (info.rti_info[RTAX_GATEWAY] == NULL)
394 senderr(EINVAL);
395 saved_nrt = NULL;
396 error = rtrequest1(RTM_ADD, &info, &saved_nrt);
397 if (error == 0 && saved_nrt) {
398 RT_LOCK(saved_nrt);
399 rt_setmetrics(rtm->rtm_inits,
400 &rtm->rtm_rmx, &saved_nrt->rt_rmx);
401 RT_REMREF(saved_nrt);
402 saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
403 RT_UNLOCK(saved_nrt);
404 }
405 break;
406
407 case RTM_DELETE:
408 saved_nrt = NULL;
409 error = rtrequest1(RTM_DELETE, &info, &saved_nrt);
410 if (error == 0) {
411 RT_LOCK(saved_nrt);
412 rt = saved_nrt;
413 goto report;
414 }
415 break;
416
417 case RTM_GET:
418 case RTM_CHANGE:
419 case RTM_LOCK:
420 rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
421 if (rnh == NULL)
422 senderr(EAFNOSUPPORT);
423 RADIX_NODE_HEAD_LOCK(rnh);
424 rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
425 info.rti_info[RTAX_NETMASK], rnh);
426 RADIX_NODE_HEAD_UNLOCK(rnh);
427 if (rt == NULL) /* XXX looks bogus */
428 senderr(ESRCH);
429 RT_LOCK(rt);
430 RT_ADDREF(rt);
431
432 switch(rtm->rtm_type) {
433
434 case RTM_GET:
435 report:
436 RT_LOCK_ASSERT(rt);
437 info.rti_info[RTAX_DST] = rt_key(rt);
438 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
439 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
440 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
441 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
442 ifp = rt->rt_ifp;
443 if (ifp) {
444 info.rti_info[RTAX_IFP] =
445 ifaddr_byindex(ifp->if_index)->ifa_addr;
446 if (jailed(so->so_cred)) {
447 bzero(&jail, sizeof(jail));
448 jail.sin_family = PF_INET;
449 jail.sin_len = sizeof(jail);
450 jail.sin_addr.s_addr =
451 htonl(prison_getip(so->so_cred));
452 info.rti_info[RTAX_IFA] =
453 (struct sockaddr *)&jail;
454 } else
455 info.rti_info[RTAX_IFA] =
456 rt->rt_ifa->ifa_addr;
457 if (ifp->if_flags & IFF_POINTOPOINT)
458 info.rti_info[RTAX_BRD] =
459 rt->rt_ifa->ifa_dstaddr;
460 rtm->rtm_index = ifp->if_index;
461 } else {
462 info.rti_info[RTAX_IFP] = NULL;
463 info.rti_info[RTAX_IFA] = NULL;
464 }
465 }
466 len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
467 if (len > rtm->rtm_msglen) {
468 struct rt_msghdr *new_rtm;
469 R_Malloc(new_rtm, struct rt_msghdr *, len);
470 if (new_rtm == NULL) {
471 RT_UNLOCK(rt);
472 senderr(ENOBUFS);
473 }
474 bcopy(rtm, new_rtm, rtm->rtm_msglen);
475 Free(rtm); rtm = new_rtm;
476 }
477 (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
478 rtm->rtm_flags = rt->rt_flags;
479 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
480 rtm->rtm_addrs = info.rti_addrs;
481 break;
482
483 case RTM_CHANGE:
484 /*
485 * New gateway could require new ifaddr, ifp;
486 * flags may also be different; ifp may be specified
487 * by ll sockaddr when protocol address is ambiguous
488 */
489 if (((rt->rt_flags & RTF_GATEWAY) &&
490 info.rti_info[RTAX_GATEWAY] != NULL) ||
491 info.rti_info[RTAX_IFP] != NULL ||
492 (info.rti_info[RTAX_IFA] != NULL &&
493 !sa_equal(info.rti_info[RTAX_IFA],
494 rt->rt_ifa->ifa_addr))) {
495 if ((error = rt_getifa(&info)) != 0) {
496 RT_UNLOCK(rt);
497 senderr(error);
498 }
499 }
500 if (info.rti_info[RTAX_GATEWAY] != NULL &&
501 (error = rt_setgate(rt, rt_key(rt),
502 info.rti_info[RTAX_GATEWAY])) != 0) {
503 RT_UNLOCK(rt);
504 senderr(error);
505 }
506 if ((ifa = info.rti_ifa) != NULL) {
507 struct ifaddr *oifa = rt->rt_ifa;
508 if (oifa != ifa) {
509 if (oifa) {
510 if (oifa->ifa_rtrequest)
511 oifa->ifa_rtrequest(
512 RTM_DELETE, rt,
513 &info);
514 IFAFREE(oifa);
515 }
516 IFAREF(ifa);
517 rt->rt_ifa = ifa;
518 rt->rt_ifp = info.rti_ifp;
519 }
520 }
521 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
522 &rt->rt_rmx);
523 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
524 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
525 if (info.rti_info[RTAX_GENMASK])
526 rt->rt_genmask = info.rti_info[RTAX_GENMASK];
527 /* FALLTHROUGH */
528 case RTM_LOCK:
529 /* We don't support locks anymore */
530 break;
531 }
532 RT_UNLOCK(rt);
533 break;
534
535 default:
536 senderr(EOPNOTSUPP);
537 }
538
539 flush:
540 if (rtm) {
541 if (error)
542 rtm->rtm_errno = error;
543 else
544 rtm->rtm_flags |= RTF_DONE;
545 }
546 if (rt) /* XXX can this be true? */
547 RTFREE(rt);
548 {
549 struct rawcb *rp = NULL;
550 /*
551 * Check to see if we don't want our own messages.
552 */
553 if ((so->so_options & SO_USELOOPBACK) == 0) {
554 if (route_cb.any_count <= 1) {
555 if (rtm)
556 Free(rtm);
557 m_freem(m);
558 return (error);
559 }
560 /* There is another listener, so construct message */
561 rp = sotorawcb(so);
562 }
563 if (rtm) {
564 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
565 if (m->m_pkthdr.len < rtm->rtm_msglen) {
566 m_freem(m);
567 m = NULL;
568 } else if (m->m_pkthdr.len > rtm->rtm_msglen)
569 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
570 Free(rtm);
571 }
572 if (m) {
573 if (rp) {
574 /*
575 * XXX insure we don't get a copy by
576 * invalidating our protocol
577 */
578 unsigned short family = rp->rcb_proto.sp_family;
579 rp->rcb_proto.sp_family = 0;
580 rt_dispatch(m, info.rti_info[RTAX_DST]);
581 rp->rcb_proto.sp_family = family;
582 } else
583 rt_dispatch(m, info.rti_info[RTAX_DST]);
584 }
585 }
586 return (error);
587 #undef sa_equal
588 }
589
590 static void
591 rt_setmetrics(u_long which, const struct rt_metrics *in,
592 struct rt_metrics_lite *out)
593 {
594 #define metric(f, e) if (which & (f)) out->e = in->e;
595 /*
596 * Only these are stored in the routing entry since introduction
597 * of tcp hostcache. The rest is ignored.
598 */
599 metric(RTV_MTU, rmx_mtu);
600 metric(RTV_EXPIRE, rmx_expire);
601 #undef metric
602 }
603
604 static void
605 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
606 {
607 #define metric(e) out->e = in->e;
608 bzero(out, sizeof(*out));
609 metric(rmx_mtu);
610 metric(rmx_expire);
611 #undef metric
612 }
613
614 /*
615 * Extract the addresses of the passed sockaddrs.
616 * Do a little sanity checking so as to avoid bad memory references.
617 * This data is derived straight from userland.
618 */
619 static int
620 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
621 {
622 struct sockaddr *sa;
623 int i;
624
625 for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
626 if ((rtinfo->rti_addrs & (1 << i)) == 0)
627 continue;
628 sa = (struct sockaddr *)cp;
629 /*
630 * It won't fit.
631 */
632 if (cp + sa->sa_len > cplim)
633 return (EINVAL);
634 /*
635 * there are no more.. quit now
636 * If there are more bits, they are in error.
637 * I've seen this. route(1) can evidently generate these.
638 * This causes kernel to core dump.
639 * for compatibility, If we see this, point to a safe address.
640 */
641 if (sa->sa_len == 0) {
642 rtinfo->rti_info[i] = &sa_zero;
643 return (0); /* should be EINVAL but for compat */
644 }
645 /* accept it */
646 rtinfo->rti_info[i] = sa;
647 cp += SA_SIZE(sa);
648 }
649 return (0);
650 }
651
652 static struct mbuf *
653 rt_msg1(int type, struct rt_addrinfo *rtinfo)
654 {
655 struct rt_msghdr *rtm;
656 struct mbuf *m;
657 int i;
658 struct sockaddr *sa;
659 int len, dlen;
660
661 switch (type) {
662
663 case RTM_DELADDR:
664 case RTM_NEWADDR:
665 len = sizeof(struct ifa_msghdr);
666 break;
667
668 case RTM_DELMADDR:
669 case RTM_NEWMADDR:
670 len = sizeof(struct ifma_msghdr);
671 break;
672
673 case RTM_IFINFO:
674 len = sizeof(struct if_msghdr);
675 break;
676
677 case RTM_IFANNOUNCE:
678 len = sizeof(struct if_announcemsghdr);
679 break;
680
681 default:
682 len = sizeof(struct rt_msghdr);
683 }
684 if (len > MCLBYTES)
685 panic("rt_msg1");
686 m = m_gethdr(M_DONTWAIT, MT_DATA);
687 if (m && len > MHLEN) {
688 MCLGET(m, M_DONTWAIT);
689 if ((m->m_flags & M_EXT) == 0) {
690 m_free(m);
691 m = NULL;
692 }
693 }
694 if (m == NULL)
695 return (m);
696 m->m_pkthdr.len = m->m_len = len;
697 m->m_pkthdr.rcvif = NULL;
698 rtm = mtod(m, struct rt_msghdr *);
699 bzero((caddr_t)rtm, len);
700 for (i = 0; i < RTAX_MAX; i++) {
701 if ((sa = rtinfo->rti_info[i]) == NULL)
702 continue;
703 rtinfo->rti_addrs |= (1 << i);
704 dlen = SA_SIZE(sa);
705 m_copyback(m, len, dlen, (caddr_t)sa);
706 len += dlen;
707 }
708 if (m->m_pkthdr.len != len) {
709 m_freem(m);
710 return (NULL);
711 }
712 rtm->rtm_msglen = len;
713 rtm->rtm_version = RTM_VERSION;
714 rtm->rtm_type = type;
715 return (m);
716 }
717
718 static int
719 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
720 {
721 int i;
722 int len, dlen, second_time = 0;
723 caddr_t cp0;
724
725 rtinfo->rti_addrs = 0;
726 again:
727 switch (type) {
728
729 case RTM_DELADDR:
730 case RTM_NEWADDR:
731 len = sizeof(struct ifa_msghdr);
732 break;
733
734 case RTM_IFINFO:
735 len = sizeof(struct if_msghdr);
736 break;
737
738 case RTM_NEWMADDR:
739 len = sizeof(struct ifma_msghdr);
740 break;
741
742 default:
743 len = sizeof(struct rt_msghdr);
744 }
745 cp0 = cp;
746 if (cp0)
747 cp += len;
748 for (i = 0; i < RTAX_MAX; i++) {
749 struct sockaddr *sa;
750
751 if ((sa = rtinfo->rti_info[i]) == NULL)
752 continue;
753 rtinfo->rti_addrs |= (1 << i);
754 dlen = SA_SIZE(sa);
755 if (cp) {
756 bcopy((caddr_t)sa, cp, (unsigned)dlen);
757 cp += dlen;
758 }
759 len += dlen;
760 }
761 len = ALIGN(len);
762 if (cp == NULL && w != NULL && !second_time) {
763 struct walkarg *rw = w;
764
765 if (rw->w_req) {
766 if (rw->w_tmemsize < len) {
767 if (rw->w_tmem)
768 free(rw->w_tmem, M_RTABLE);
769 rw->w_tmem = (caddr_t)
770 malloc(len, M_RTABLE, M_NOWAIT);
771 if (rw->w_tmem)
772 rw->w_tmemsize = len;
773 }
774 if (rw->w_tmem) {
775 cp = rw->w_tmem;
776 second_time = 1;
777 goto again;
778 }
779 }
780 }
781 if (cp) {
782 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
783
784 rtm->rtm_version = RTM_VERSION;
785 rtm->rtm_type = type;
786 rtm->rtm_msglen = len;
787 }
788 return (len);
789 }
790
791 /*
792 * This routine is called to generate a message from the routing
793 * socket indicating that a redirect has occured, a routing lookup
794 * has failed, or that a protocol has detected timeouts to a particular
795 * destination.
796 */
797 void
798 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
799 {
800 struct rt_msghdr *rtm;
801 struct mbuf *m;
802 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
803
804 if (route_cb.any_count == 0)
805 return;
806 m = rt_msg1(type, rtinfo);
807 if (m == NULL)
808 return;
809 rtm = mtod(m, struct rt_msghdr *);
810 rtm->rtm_flags = RTF_DONE | flags;
811 rtm->rtm_errno = error;
812 rtm->rtm_addrs = rtinfo->rti_addrs;
813 rt_dispatch(m, sa);
814 }
815
816 /*
817 * This routine is called to generate a message from the routing
818 * socket indicating that the status of a network interface has changed.
819 */
820 void
821 rt_ifmsg(struct ifnet *ifp)
822 {
823 struct if_msghdr *ifm;
824 struct mbuf *m;
825 struct rt_addrinfo info;
826
827 if (route_cb.any_count == 0)
828 return;
829 bzero((caddr_t)&info, sizeof(info));
830 m = rt_msg1(RTM_IFINFO, &info);
831 if (m == NULL)
832 return;
833 ifm = mtod(m, struct if_msghdr *);
834 ifm->ifm_index = ifp->if_index;
835 ifm->ifm_flags = ifp->if_flags;
836 ifm->ifm_data = ifp->if_data;
837 ifm->ifm_addrs = 0;
838 rt_dispatch(m, NULL);
839 }
840
841 /*
842 * This is called to generate messages from the routing socket
843 * indicating a network interface has had addresses associated with it.
844 * if we ever reverse the logic and replace messages TO the routing
845 * socket indicate a request to configure interfaces, then it will
846 * be unnecessary as the routing socket will automatically generate
847 * copies of it.
848 */
849 void
850 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
851 {
852 struct rt_addrinfo info;
853 struct sockaddr *sa = NULL;
854 int pass;
855 struct mbuf *m = NULL;
856 struct ifnet *ifp = ifa->ifa_ifp;
857
858 if (route_cb.any_count == 0)
859 return;
860 for (pass = 1; pass < 3; pass++) {
861 bzero((caddr_t)&info, sizeof(info));
862 if ((cmd == RTM_ADD && pass == 1) ||
863 (cmd == RTM_DELETE && pass == 2)) {
864 struct ifa_msghdr *ifam;
865 int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
866
867 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
868 info.rti_info[RTAX_IFP] =
869 ifaddr_byindex(ifp->if_index)->ifa_addr;
870 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
871 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
872 if ((m = rt_msg1(ncmd, &info)) == NULL)
873 continue;
874 ifam = mtod(m, struct ifa_msghdr *);
875 ifam->ifam_index = ifp->if_index;
876 ifam->ifam_metric = ifa->ifa_metric;
877 ifam->ifam_flags = ifa->ifa_flags;
878 ifam->ifam_addrs = info.rti_addrs;
879 }
880 if ((cmd == RTM_ADD && pass == 2) ||
881 (cmd == RTM_DELETE && pass == 1)) {
882 struct rt_msghdr *rtm;
883
884 if (rt == NULL)
885 continue;
886 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
887 info.rti_info[RTAX_DST] = sa = rt_key(rt);
888 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
889 if ((m = rt_msg1(cmd, &info)) == NULL)
890 continue;
891 rtm = mtod(m, struct rt_msghdr *);
892 rtm->rtm_index = ifp->if_index;
893 rtm->rtm_flags |= rt->rt_flags;
894 rtm->rtm_errno = error;
895 rtm->rtm_addrs = info.rti_addrs;
896 }
897 rt_dispatch(m, sa);
898 }
899 }
900
901 /*
902 * This is the analogue to the rt_newaddrmsg which performs the same
903 * function but for multicast group memberhips. This is easier since
904 * there is no route state to worry about.
905 */
906 void
907 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
908 {
909 struct rt_addrinfo info;
910 struct mbuf *m = NULL;
911 struct ifnet *ifp = ifma->ifma_ifp;
912 struct ifma_msghdr *ifmam;
913
914 if (route_cb.any_count == 0)
915 return;
916
917 bzero((caddr_t)&info, sizeof(info));
918 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
919 info.rti_info[RTAX_IFP] =
920 ifp ? ifaddr_byindex(ifp->if_index)->ifa_addr : NULL;
921 /*
922 * If a link-layer address is present, present it as a ``gateway''
923 * (similarly to how ARP entries, e.g., are presented).
924 */
925 info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
926 m = rt_msg1(cmd, &info);
927 if (m == NULL)
928 return;
929 ifmam = mtod(m, struct ifma_msghdr *);
930 ifmam->ifmam_index = ifp->if_index;
931 ifmam->ifmam_addrs = info.rti_addrs;
932 rt_dispatch(m, ifma->ifma_addr);
933 }
934
935 /*
936 * This is called to generate routing socket messages indicating
937 * network interface arrival and departure.
938 */
939 void
940 rt_ifannouncemsg(struct ifnet *ifp, int what)
941 {
942 struct if_announcemsghdr *ifan;
943 struct mbuf *m;
944 struct rt_addrinfo info;
945
946 if (route_cb.any_count == 0)
947 return;
948 bzero((caddr_t)&info, sizeof(info));
949 m = rt_msg1(RTM_IFANNOUNCE, &info);
950 if (m == NULL)
951 return;
952 ifan = mtod(m, struct if_announcemsghdr *);
953 ifan->ifan_index = ifp->if_index;
954 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
955 ifan->ifan_what = what;
956 rt_dispatch(m, NULL);
957 }
958
959 static void
960 rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
961 {
962 unsigned short *family;
963 struct m_tag *tag;
964
965 /*
966 * Preserve the family from the sockaddr, if any, in an m_tag for
967 * use when injecting the mbuf into the routing socket buffer from
968 * the netisr.
969 */
970 if (sa != NULL) {
971 tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
972 M_NOWAIT);
973 if (tag == NULL) {
974 m_freem(m);
975 return;
976 }
977 family = (unsigned short *)(tag + 1);
978 *family = sa ? sa->sa_family : 0;
979 m_tag_prepend(m, tag);
980 }
981 netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */
982 }
983
984 /*
985 * This is used in dumping the kernel table via sysctl().
986 */
987 static int
988 sysctl_dumpentry(struct radix_node *rn, void *vw)
989 {
990 struct walkarg *w = vw;
991 struct rtentry *rt = (struct rtentry *)rn;
992 int error = 0, size;
993 struct rt_addrinfo info;
994
995 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
996 return 0;
997 bzero((caddr_t)&info, sizeof(info));
998 info.rti_info[RTAX_DST] = rt_key(rt);
999 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1000 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1001 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1002 if (rt->rt_ifp) {
1003 info.rti_info[RTAX_IFP] =
1004 ifaddr_byindex(rt->rt_ifp->if_index)->ifa_addr;
1005 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1006 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1007 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1008 }
1009 size = rt_msg2(RTM_GET, &info, NULL, w);
1010 if (w->w_req && w->w_tmem) {
1011 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1012
1013 rtm->rtm_flags = rt->rt_flags;
1014 rtm->rtm_use = rt->rt_rmx.rmx_pksent;
1015 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1016 rtm->rtm_index = rt->rt_ifp->if_index;
1017 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1018 rtm->rtm_addrs = info.rti_addrs;
1019 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1020 return (error);
1021 }
1022 return (error);
1023 }
1024
1025 static int
1026 sysctl_iflist(int af, struct walkarg *w)
1027 {
1028 struct ifnet *ifp;
1029 struct ifaddr *ifa;
1030 struct rt_addrinfo info;
1031 int len, error = 0;
1032
1033 bzero((caddr_t)&info, sizeof(info));
1034 /* IFNET_RLOCK(); */ /* could sleep XXX */
1035 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1036 if (w->w_arg && w->w_arg != ifp->if_index)
1037 continue;
1038 ifa = ifaddr_byindex(ifp->if_index);
1039 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1040 len = rt_msg2(RTM_IFINFO, &info, NULL, w);
1041 info.rti_info[RTAX_IFP] = NULL;
1042 if (w->w_req && w->w_tmem) {
1043 struct if_msghdr *ifm;
1044
1045 ifm = (struct if_msghdr *)w->w_tmem;
1046 ifm->ifm_index = ifp->if_index;
1047 ifm->ifm_flags = ifp->if_flags;
1048 ifm->ifm_data = ifp->if_data;
1049 ifm->ifm_addrs = info.rti_addrs;
1050 error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
1051 if (error)
1052 goto done;
1053 }
1054 while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1055 if (af && af != ifa->ifa_addr->sa_family)
1056 continue;
1057 if (jailed(curthread->td_ucred) &&
1058 prison_if(curthread->td_ucred, ifa->ifa_addr))
1059 continue;
1060 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1061 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1062 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1063 len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
1064 if (w->w_req && w->w_tmem) {
1065 struct ifa_msghdr *ifam;
1066
1067 ifam = (struct ifa_msghdr *)w->w_tmem;
1068 ifam->ifam_index = ifa->ifa_ifp->if_index;
1069 ifam->ifam_flags = ifa->ifa_flags;
1070 ifam->ifam_metric = ifa->ifa_metric;
1071 ifam->ifam_addrs = info.rti_addrs;
1072 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1073 if (error)
1074 goto done;
1075 }
1076 }
1077 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1078 info.rti_info[RTAX_BRD] = NULL;
1079 }
1080 done:
1081 /* IFNET_RUNLOCK(); */ /* XXX */
1082 return (error);
1083 }
1084
1085 int
1086 sysctl_ifmalist(int af, struct walkarg *w)
1087 {
1088 struct ifnet *ifp;
1089 struct ifmultiaddr *ifma;
1090 struct rt_addrinfo info;
1091 int len, error = 0;
1092 struct ifaddr *ifa;
1093
1094 bzero((caddr_t)&info, sizeof(info));
1095 /* IFNET_RLOCK(); */ /* could sleep XXX */
1096 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1097 if (w->w_arg && w->w_arg != ifp->if_index)
1098 continue;
1099 ifa = ifaddr_byindex(ifp->if_index);
1100 info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1101 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1102 if (af && af != ifma->ifma_addr->sa_family)
1103 continue;
1104 if (jailed(curproc->p_ucred) &&
1105 prison_if(curproc->p_ucred, ifma->ifma_addr))
1106 continue;
1107 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1108 info.rti_info[RTAX_GATEWAY] =
1109 (ifma->ifma_addr->sa_family != AF_LINK) ?
1110 ifma->ifma_lladdr : NULL;
1111 len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
1112 if (w->w_req && w->w_tmem) {
1113 struct ifma_msghdr *ifmam;
1114
1115 ifmam = (struct ifma_msghdr *)w->w_tmem;
1116 ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1117 ifmam->ifmam_flags = 0;
1118 ifmam->ifmam_addrs = info.rti_addrs;
1119 error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1120 if (error)
1121 goto done;
1122 }
1123 }
1124 }
1125 done:
1126 /* IFNET_RUNLOCK(); */ /* XXX */
1127 return (error);
1128 }
1129
1130 static int
1131 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1132 {
1133 int *name = (int *)arg1;
1134 u_int namelen = arg2;
1135 struct radix_node_head *rnh;
1136 int i, lim, s, error = EINVAL;
1137 u_char af;
1138 struct walkarg w;
1139
1140 name ++;
1141 namelen--;
1142 if (req->newptr)
1143 return (EPERM);
1144 if (namelen != 3)
1145 return ((namelen < 3) ? EISDIR : ENOTDIR);
1146 af = name[0];
1147 if (af > AF_MAX)
1148 return (EINVAL);
1149 bzero(&w, sizeof(w));
1150 w.w_op = name[1];
1151 w.w_arg = name[2];
1152 w.w_req = req;
1153
1154 s = splnet();
1155 switch (w.w_op) {
1156
1157 case NET_RT_DUMP:
1158 case NET_RT_FLAGS:
1159 if (af == 0) { /* dump all tables */
1160 i = 1;
1161 lim = AF_MAX;
1162 } else /* dump only one table */
1163 i = lim = af;
1164 for (error = 0; error == 0 && i <= lim; i++)
1165 if ((rnh = rt_tables[i]) != NULL) {
1166 /* RADIX_NODE_HEAD_LOCK(rnh); */
1167 error = rnh->rnh_walktree(rnh,
1168 sysctl_dumpentry, &w);/* could sleep XXX */
1169 /* RADIX_NODE_HEAD_UNLOCK(rnh); */
1170 } else if (af != 0)
1171 error = EAFNOSUPPORT;
1172 break;
1173
1174 case NET_RT_IFLIST:
1175 error = sysctl_iflist(af, &w);
1176 break;
1177
1178 case NET_RT_IFMALIST:
1179 error = sysctl_ifmalist(af, &w);
1180 break;
1181 }
1182 splx(s);
1183 if (w.w_tmem)
1184 free(w.w_tmem, M_RTABLE);
1185 return (error);
1186 }
1187
1188 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1189
1190 /*
1191 * Definitions of protocols supported in the ROUTE domain.
1192 */
1193
1194 extern struct domain routedomain; /* or at least forward */
1195
1196 static struct protosw routesw[] = {
1197 { SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR,
1198 0, route_output, raw_ctlinput, 0,
1199 0,
1200 raw_init, 0, 0, 0,
1201 &route_usrreqs
1202 }
1203 };
1204
1205 static struct domain routedomain =
1206 { PF_ROUTE, "route", 0, 0, 0,
1207 routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
1208
1209 DOMAIN_SET(route);
Cache object: e19939570abf79fee054b47a1f9ba769
|