1 /*
2 * IP multicast forwarding procedures
3 *
4 * Written by David Waitzman, BBN Labs, August 1988.
5 * Modified by Steve Deering, Stanford, February 1989.
6 * Modified by Mark J. Steiglitz, Stanford, May, 1991
7 * Modified by Van Jacobson, LBL, January 1993
8 * Modified by Ajit Thyagarajan, PARC, August 1993
9 * Modified by Bill Fenner, PARC, April 1995
10 *
11 * MROUTING Revision: 3.5
12 * $FreeBSD: releng/5.0/sys/netinet/ip_mroute.c 106968 2002-11-15 22:53:53Z luigi $
13 */
14
15 #include "opt_mac.h"
16 #include "opt_mrouting.h"
17 #include "opt_random_ip_id.h"
18
19 #include <sys/param.h>
20 #include <sys/kernel.h>
21 #include <sys/lock.h>
22 #include <sys/mac.h>
23 #include <sys/malloc.h>
24 #include <sys/mbuf.h>
25 #include <sys/protosw.h>
26 #include <sys/signalvar.h>
27 #include <sys/socket.h>
28 #include <sys/socketvar.h>
29 #include <sys/sockio.h>
30 #include <sys/sx.h>
31 #include <sys/sysctl.h>
32 #include <sys/syslog.h>
33 #include <sys/systm.h>
34 #include <sys/time.h>
35 #include <net/if.h>
36 #include <net/route.h>
37 #include <netinet/in.h>
38 #include <netinet/igmp.h>
39 #include <netinet/in_systm.h>
40 #include <netinet/in_var.h>
41 #include <netinet/ip.h>
42 #include <netinet/ip_encap.h>
43 #include <netinet/ip_mroute.h>
44 #include <netinet/ip_var.h>
45 #include <netinet/udp.h>
46 #include <machine/in_cksum.h>
47
48 /*
49 * Control debugging code for rsvp and multicast routing code.
50 * Can only set them with the debugger.
51 */
52 static u_int rsvpdebug; /* non-zero enables debugging */
53
54 static u_int mrtdebug; /* any set of the flags below */
55 #define DEBUG_MFC 0x02
56 #define DEBUG_FORWARD 0x04
57 #define DEBUG_EXPIRE 0x08
58 #define DEBUG_XMIT 0x10
59
60 #define M_HASCL(m) ((m)->m_flags & M_EXT)
61
62 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables");
63
64 static struct mrtstat mrtstat;
65 SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
66 &mrtstat, mrtstat,
67 "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)");
68
69 static struct mfc *mfctable[MFCTBLSIZ];
70 static u_char nexpire[MFCTBLSIZ];
71 static struct vif viftable[MAXVIFS];
72
73 static struct callout_handle expire_upcalls_ch;
74
75 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
76 #define UPCALL_EXPIRE 6 /* number of timeouts */
77
78 /*
79 * Define the token bucket filter structures
80 * tbftable -> each vif has one of these for storing info
81 */
82
83 static struct tbf tbftable[MAXVIFS];
84 #define TBF_REPROCESS (hz / 100) /* 100x / second */
85
86 /*
87 * 'Interfaces' associated with decapsulator (so we can tell
88 * packets that went through it from ones that get reflected
89 * by a broken gateway). These interfaces are never linked into
90 * the system ifnet list & no routes point to them. I.e., packets
91 * can't be sent this way. They only exist as a placeholder for
92 * multicast source verification.
93 */
94 static struct ifnet multicast_decap_if[MAXVIFS];
95
96 #define ENCAP_TTL 64
97 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */
98
99 /* prototype IP hdr for encapsulated packets */
100 static struct ip multicast_encap_iphdr = {
101 #if BYTE_ORDER == LITTLE_ENDIAN
102 sizeof(struct ip) >> 2, IPVERSION,
103 #else
104 IPVERSION, sizeof(struct ip) >> 2,
105 #endif
106 0, /* tos */
107 sizeof(struct ip), /* total length */
108 0, /* id */
109 0, /* frag offset */
110 ENCAP_TTL, ENCAP_PROTO,
111 0, /* checksum */
112 };
113
114 /*
115 * Private variables.
116 */
117 static vifi_t numvifs;
118 static const struct encaptab *encap_cookie;
119
120 /*
121 * one-back cache used by mroute_encapcheck to locate a tunnel's vif
122 * given a datagram's src ip address.
123 */
124 static u_long last_encap_src;
125 static struct vif *last_encap_vif;
126
127 static u_long X_ip_mcast_src(int vifi);
128 static int X_ip_mforward(struct ip *ip, struct ifnet *ifp,
129 struct mbuf *m, struct ip_moptions *imo);
130 static int X_ip_mrouter_done(void);
131 static int X_ip_mrouter_get(struct socket *so, struct sockopt *m);
132 static int X_ip_mrouter_set(struct socket *so, struct sockopt *m);
133 static int X_legal_vif_num(int vif);
134 static int X_mrt_ioctl(int cmd, caddr_t data);
135
136 static int get_sg_cnt(struct sioc_sg_req *);
137 static int get_vif_cnt(struct sioc_vif_req *);
138 static int ip_mrouter_init(struct socket *, int);
139 static int add_vif(struct vifctl *);
140 static int del_vif(vifi_t);
141 static int add_mfc(struct mfcctl *);
142 static int del_mfc(struct mfcctl *);
143 static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
144 static int set_assert(int);
145 static void expire_upcalls(void *);
146 static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
147 static void phyint_send(struct ip *, struct vif *, struct mbuf *);
148 static void encap_send(struct ip *, struct vif *, struct mbuf *);
149 static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long);
150 static void tbf_queue(struct vif *, struct mbuf *);
151 static void tbf_process_q(struct vif *);
152 static void tbf_reprocess_q(void *);
153 static int tbf_dq_sel(struct vif *, struct ip *);
154 static void tbf_send_packet(struct vif *, struct mbuf *);
155 static void tbf_update_tokens(struct vif *);
156 static int priority(struct vif *, struct ip *);
157
158 /*
159 * whether or not special PIM assert processing is enabled.
160 */
161 static int pim_assert;
162 /*
163 * Rate limit for assert notification messages, in usec
164 */
165 #define ASSERT_MSG_TIME 3000000
166
167 /*
168 * Hash function for a source, group entry
169 */
170 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
171 ((g) >> 20) ^ ((g) >> 10) ^ (g))
172
173 /*
174 * Find a route for a given origin IP address and Multicast group address
175 * Type of service parameter to be added in the future!!!
176 * Statistics are updated by the caller if needed
177 * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)
178 */
179 static struct mfc *
180 mfc_find(in_addr_t o, in_addr_t g)
181 {
182 struct mfc *rt;
183
184 for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next)
185 if ((rt->mfc_origin.s_addr == o) &&
186 (rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL))
187 break;
188 return rt;
189 }
190
191 /*
192 * Macros to compute elapsed time efficiently
193 * Borrowed from Van Jacobson's scheduling code
194 */
195 #define TV_DELTA(a, b, delta) { \
196 int xxs; \
197 delta = (a).tv_usec - (b).tv_usec; \
198 if ((xxs = (a).tv_sec - (b).tv_sec)) { \
199 switch (xxs) { \
200 case 2: \
201 delta += 1000000; \
202 /* FALLTHROUGH */ \
203 case 1: \
204 delta += 1000000; \
205 break; \
206 default: \
207 delta += (1000000 * xxs); \
208 } \
209 } \
210 }
211
212 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
213 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
214
215 /*
216 * Handle MRT setsockopt commands to modify the multicast routing tables.
217 */
218 static int
219 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
220 {
221 int error, optval;
222 vifi_t vifi;
223 struct vifctl vifc;
224 struct mfcctl mfc;
225
226 if (so != ip_mrouter && sopt->sopt_name != MRT_INIT)
227 return EPERM;
228
229 error = 0;
230 switch (sopt->sopt_name) {
231 case MRT_INIT:
232 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
233 if (error)
234 break;
235 error = ip_mrouter_init(so, optval);
236 break;
237
238 case MRT_DONE:
239 error = ip_mrouter_done();
240 break;
241
242 case MRT_ADD_VIF:
243 error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
244 if (error)
245 break;
246 error = add_vif(&vifc);
247 break;
248
249 case MRT_DEL_VIF:
250 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
251 if (error)
252 break;
253 error = del_vif(vifi);
254 break;
255
256 case MRT_ADD_MFC:
257 case MRT_DEL_MFC:
258 error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc);
259 if (error)
260 break;
261 if (sopt->sopt_name == MRT_ADD_MFC)
262 error = add_mfc(&mfc);
263 else
264 error = del_mfc(&mfc);
265 break;
266
267 case MRT_ASSERT:
268 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
269 if (error)
270 break;
271 set_assert(optval);
272 break;
273
274 default:
275 error = EOPNOTSUPP;
276 break;
277 }
278 return error;
279 }
280
281 /*
282 * Handle MRT getsockopt commands
283 */
284 static int
285 X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
286 {
287 int error;
288 static int version = 0x0305; /* !!! why is this here? XXX */
289
290 switch (sopt->sopt_name) {
291 case MRT_VERSION:
292 error = sooptcopyout(sopt, &version, sizeof version);
293 break;
294
295 case MRT_ASSERT:
296 error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert);
297 break;
298
299 default:
300 error = EOPNOTSUPP;
301 break;
302 }
303 return error;
304 }
305
306 /*
307 * Handle ioctl commands to obtain information from the cache
308 */
309 static int
310 X_mrt_ioctl(int cmd, caddr_t data)
311 {
312 int error = 0;
313
314 switch (cmd) {
315 case (SIOCGETVIFCNT):
316 error = get_vif_cnt((struct sioc_vif_req *)data);
317 break;
318
319 case (SIOCGETSGCNT):
320 error = get_sg_cnt((struct sioc_sg_req *)data);
321 break;
322
323 default:
324 error = EINVAL;
325 break;
326 }
327 return error;
328 }
329
330 /*
331 * returns the packet, byte, rpf-failure count for the source group provided
332 */
333 static int
334 get_sg_cnt(struct sioc_sg_req *req)
335 {
336 int s;
337 struct mfc *rt;
338
339 s = splnet();
340 rt = mfc_find(req->src.s_addr, req->grp.s_addr);
341 splx(s);
342 if (rt == NULL) {
343 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
344 return EADDRNOTAVAIL;
345 }
346 req->pktcnt = rt->mfc_pkt_cnt;
347 req->bytecnt = rt->mfc_byte_cnt;
348 req->wrong_if = rt->mfc_wrong_if;
349 return 0;
350 }
351
352 /*
353 * returns the input and output packet and byte counts on the vif provided
354 */
355 static int
356 get_vif_cnt(struct sioc_vif_req *req)
357 {
358 vifi_t vifi = req->vifi;
359
360 if (vifi >= numvifs)
361 return EINVAL;
362
363 req->icount = viftable[vifi].v_pkt_in;
364 req->ocount = viftable[vifi].v_pkt_out;
365 req->ibytes = viftable[vifi].v_bytes_in;
366 req->obytes = viftable[vifi].v_bytes_out;
367
368 return 0;
369 }
370
371 /*
372 * Enable multicast routing
373 */
374 static int
375 ip_mrouter_init(struct socket *so, int version)
376 {
377 if (mrtdebug)
378 log(LOG_DEBUG, "ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
379 so->so_type, so->so_proto->pr_protocol);
380
381 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP)
382 return EOPNOTSUPP;
383
384 if (version != 1)
385 return ENOPROTOOPT;
386
387 if (ip_mrouter != NULL)
388 return EADDRINUSE;
389
390 ip_mrouter = so;
391
392 bzero((caddr_t)mfctable, sizeof(mfctable));
393 bzero((caddr_t)nexpire, sizeof(nexpire));
394
395 pim_assert = 0;
396
397 expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT);
398
399 if (mrtdebug)
400 log(LOG_DEBUG, "ip_mrouter_init\n");
401
402 return 0;
403 }
404
405 /*
406 * Disable multicast routing
407 */
408 static int
409 X_ip_mrouter_done(void)
410 {
411 vifi_t vifi;
412 int i;
413 struct ifnet *ifp;
414 struct ifreq ifr;
415 struct mfc *rt;
416 struct rtdetq *rte;
417 int s;
418
419 s = splnet();
420
421 /*
422 * For each phyint in use, disable promiscuous reception of all IP
423 * multicasts.
424 */
425 for (vifi = 0; vifi < numvifs; vifi++) {
426 if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
427 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
428 struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr);
429
430 so->sin_len = sizeof(struct sockaddr_in);
431 so->sin_family = AF_INET;
432 so->sin_addr.s_addr = INADDR_ANY;
433 ifp = viftable[vifi].v_ifp;
434 if_allmulti(ifp, 0);
435 }
436 }
437 bzero((caddr_t)tbftable, sizeof(tbftable));
438 bzero((caddr_t)viftable, sizeof(viftable));
439 numvifs = 0;
440 pim_assert = 0;
441
442 untimeout(expire_upcalls, NULL, expire_upcalls_ch);
443
444 /*
445 * Free all multicast forwarding cache entries.
446 */
447 for (i = 0; i < MFCTBLSIZ; i++) {
448 for (rt = mfctable[i]; rt != NULL; ) {
449 struct mfc *nr = rt->mfc_next;
450
451 for (rte = rt->mfc_stall; rte != NULL; ) {
452 struct rtdetq *n = rte->next;
453
454 m_freem(rte->m);
455 free(rte, M_MRTABLE);
456 rte = n;
457 }
458 free(rt, M_MRTABLE);
459 rt = nr;
460 }
461 }
462
463 bzero((caddr_t)mfctable, sizeof(mfctable));
464
465 /*
466 * Reset de-encapsulation cache
467 */
468 last_encap_src = INADDR_ANY;
469 last_encap_vif = NULL;
470 if (encap_cookie) {
471 encap_detach(encap_cookie);
472 encap_cookie = NULL;
473 }
474
475 ip_mrouter = NULL;
476
477 splx(s);
478
479 if (mrtdebug)
480 log(LOG_DEBUG, "ip_mrouter_done\n");
481
482 return 0;
483 }
484
485 /*
486 * Set PIM assert processing global
487 */
488 static int
489 set_assert(int i)
490 {
491 if ((i != 1) && (i != 0))
492 return EINVAL;
493
494 pim_assert = i;
495
496 return 0;
497 }
498
499 /*
500 * Decide if a packet is from a tunnelled peer.
501 * Return 0 if not, 64 if so. XXX yuck.. 64 ???
502 */
503 static int
504 mroute_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
505 {
506 struct ip *ip = mtod(m, struct ip *);
507 int hlen = ip->ip_hl << 2;
508
509 /*
510 * don't claim the packet if it's not to a multicast destination or if
511 * we don't have an encapsulating tunnel with the source.
512 * Note: This code assumes that the remote site IP address
513 * uniquely identifies the tunnel (i.e., that this site has
514 * at most one tunnel with the remote site).
515 */
516 if (!IN_MULTICAST(ntohl(((struct ip *)((char *)ip+hlen))->ip_dst.s_addr)))
517 return 0;
518 if (ip->ip_src.s_addr != last_encap_src) {
519 struct vif *vifp = viftable;
520 struct vif *vife = vifp + numvifs;
521
522 last_encap_src = ip->ip_src.s_addr;
523 last_encap_vif = NULL;
524 for ( ; vifp < vife; ++vifp)
525 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
526 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) == VIFF_TUNNEL)
527 last_encap_vif = vifp;
528 break;
529 }
530 }
531 if (last_encap_vif == NULL) {
532 last_encap_src = INADDR_ANY;
533 return 0;
534 }
535 return 64;
536 }
537
538 /*
539 * De-encapsulate a packet and feed it back through ip input (this
540 * routine is called whenever IP gets a packet that mroute_encap_func()
541 * claimed).
542 */
543 static void
544 mroute_encap_input(struct mbuf *m, int off)
545 {
546 struct ip *ip = mtod(m, struct ip *);
547 int hlen = ip->ip_hl << 2;
548
549 if (hlen > sizeof(struct ip))
550 ip_stripoptions(m, (struct mbuf *) 0);
551 m->m_data += sizeof(struct ip);
552 m->m_len -= sizeof(struct ip);
553 m->m_pkthdr.len -= sizeof(struct ip);
554
555 m->m_pkthdr.rcvif = last_encap_vif->v_ifp;
556
557 (void) IF_HANDOFF(&ipintrq, m, NULL);
558 /*
559 * normally we would need a "schednetisr(NETISR_IP)"
560 * here but we were called by ip_input and it is going
561 * to loop back & try to dequeue the packet we just
562 * queued as soon as we return so we avoid the
563 * unnecessary software interrrupt.
564 */
565 }
566
567 extern struct domain inetdomain;
568 static struct protosw mroute_encap_protosw =
569 { SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR,
570 mroute_encap_input, 0, 0, rip_ctloutput,
571 0,
572 0, 0, 0, 0,
573 &rip_usrreqs
574 };
575
576 /*
577 * Add a vif to the vif table
578 */
579 static int
580 add_vif(struct vifctl *vifcp)
581 {
582 struct vif *vifp = viftable + vifcp->vifc_vifi;
583 struct sockaddr_in sin = {sizeof sin, AF_INET};
584 struct ifaddr *ifa;
585 struct ifnet *ifp;
586 int error, s;
587 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
588
589 if (vifcp->vifc_vifi >= MAXVIFS)
590 return EINVAL;
591 if (vifp->v_lcl_addr.s_addr != INADDR_ANY)
592 return EADDRINUSE;
593 if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY)
594 return EADDRNOTAVAIL;
595
596 /* Find the interface with an address in AF_INET family */
597 sin.sin_addr = vifcp->vifc_lcl_addr;
598 ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
599 if (ifa == NULL)
600 return EADDRNOTAVAIL;
601 ifp = ifa->ifa_ifp;
602
603 if (vifcp->vifc_flags & VIFF_TUNNEL) {
604 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
605 /*
606 * An encapsulating tunnel is wanted. Tell
607 * mroute_encap_input() to start paying attention
608 * to encapsulated packets.
609 */
610 if (encap_cookie == NULL) {
611 encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4,
612 mroute_encapcheck,
613 (struct protosw *)&mroute_encap_protosw, NULL);
614
615 if (encap_cookie == NULL) {
616 printf("ip_mroute: unable to attach encap\n");
617 return EIO; /* XXX */
618 }
619 for (s = 0; s < MAXVIFS; ++s) {
620 multicast_decap_if[s].if_name = "mdecap";
621 multicast_decap_if[s].if_unit = s;
622 }
623 }
624 /*
625 * Set interface to fake encapsulator interface
626 */
627 ifp = &multicast_decap_if[vifcp->vifc_vifi];
628 /*
629 * Prepare cached route entry
630 */
631 bzero(&vifp->v_route, sizeof(vifp->v_route));
632 } else {
633 log(LOG_ERR, "source routed tunnels not supported\n");
634 return EOPNOTSUPP;
635 }
636 } else { /* Make sure the interface supports multicast */
637 if ((ifp->if_flags & IFF_MULTICAST) == 0)
638 return EOPNOTSUPP;
639
640 /* Enable promiscuous reception of all IP multicasts from the if */
641 s = splnet();
642 error = if_allmulti(ifp, 1);
643 splx(s);
644 if (error)
645 return error;
646 }
647
648 s = splnet();
649 /* define parameters for the tbf structure */
650 vifp->v_tbf = v_tbf;
651 GET_TIME(vifp->v_tbf->tbf_last_pkt_t);
652 vifp->v_tbf->tbf_n_tok = 0;
653 vifp->v_tbf->tbf_q_len = 0;
654 vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
655 vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
656
657 vifp->v_flags = vifcp->vifc_flags;
658 vifp->v_threshold = vifcp->vifc_threshold;
659 vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
660 vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
661 vifp->v_ifp = ifp;
662 /* scaling up here allows division by 1024 in critical code */
663 vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000;
664 vifp->v_rsvp_on = 0;
665 vifp->v_rsvpd = NULL;
666 /* initialize per vif pkt counters */
667 vifp->v_pkt_in = 0;
668 vifp->v_pkt_out = 0;
669 vifp->v_bytes_in = 0;
670 vifp->v_bytes_out = 0;
671 splx(s);
672
673 /* Adjust numvifs up if the vifi is higher than numvifs */
674 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
675
676 if (mrtdebug)
677 log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n",
678 vifcp->vifc_vifi,
679 (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr),
680 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
681 (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr),
682 vifcp->vifc_threshold,
683 vifcp->vifc_rate_limit);
684
685 return 0;
686 }
687
688 /*
689 * Delete a vif from the vif table
690 */
691 static int
692 del_vif(vifi_t vifi)
693 {
694 struct vif *vifp;
695 int s;
696
697 if (vifi >= numvifs)
698 return EINVAL;
699 vifp = &viftable[vifi];
700 if (vifp->v_lcl_addr.s_addr == INADDR_ANY)
701 return EADDRNOTAVAIL;
702
703 s = splnet();
704
705 if (!(vifp->v_flags & VIFF_TUNNEL))
706 if_allmulti(vifp->v_ifp, 0);
707
708 if (vifp == last_encap_vif) {
709 last_encap_vif = NULL;
710 last_encap_src = INADDR_ANY;
711 }
712
713 /*
714 * Free packets queued at the interface
715 */
716 while (vifp->v_tbf->tbf_q) {
717 struct mbuf *m = vifp->v_tbf->tbf_q;
718
719 vifp->v_tbf->tbf_q = m->m_act;
720 m_freem(m);
721 }
722
723 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
724 bzero((caddr_t)vifp, sizeof (*vifp));
725
726 if (mrtdebug)
727 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs);
728
729 /* Adjust numvifs down */
730 for (vifi = numvifs; vifi > 0; vifi--)
731 if (viftable[vifi-1].v_lcl_addr.s_addr != INADDR_ANY)
732 break;
733 numvifs = vifi;
734
735 splx(s);
736
737 return 0;
738 }
739
740 /*
741 * update an mfc entry without resetting counters and S,G addresses.
742 */
743 static void
744 update_mfc_params(struct mfc *rt, struct mfcctl *mfccp)
745 {
746 int i;
747
748 rt->mfc_parent = mfccp->mfcc_parent;
749 for (i = 0; i < numvifs; i++)
750 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
751 }
752
753 /*
754 * fully initialize an mfc entry from the parameter.
755 */
756 static void
757 init_mfc_params(struct mfc *rt, struct mfcctl *mfccp)
758 {
759 rt->mfc_origin = mfccp->mfcc_origin;
760 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
761
762 update_mfc_params(rt, mfccp);
763
764 /* initialize pkt counters per src-grp */
765 rt->mfc_pkt_cnt = 0;
766 rt->mfc_byte_cnt = 0;
767 rt->mfc_wrong_if = 0;
768 rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
769 }
770
771
772 /*
773 * Add an mfc entry
774 */
775 static int
776 add_mfc(struct mfcctl *mfccp)
777 {
778 struct mfc *rt;
779 u_long hash;
780 struct rtdetq *rte;
781 u_short nstl;
782 int s;
783
784 rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
785
786 /* If an entry already exists, just update the fields */
787 if (rt) {
788 if (mrtdebug & DEBUG_MFC)
789 log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n",
790 (u_long)ntohl(mfccp->mfcc_origin.s_addr),
791 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
792 mfccp->mfcc_parent);
793
794 s = splnet();
795 update_mfc_params(rt, mfccp);
796 splx(s);
797 return 0;
798 }
799
800 /*
801 * Find the entry for which the upcall was made and update
802 */
803 s = splnet();
804 hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
805 for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) {
806
807 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
808 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
809 (rt->mfc_stall != NULL)) {
810
811 if (nstl++)
812 log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
813 "multiple kernel entries",
814 (u_long)ntohl(mfccp->mfcc_origin.s_addr),
815 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
816 mfccp->mfcc_parent, (void *)rt->mfc_stall);
817
818 if (mrtdebug & DEBUG_MFC)
819 log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
820 (u_long)ntohl(mfccp->mfcc_origin.s_addr),
821 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
822 mfccp->mfcc_parent, (void *)rt->mfc_stall);
823
824 init_mfc_params(rt, mfccp);
825
826 rt->mfc_expire = 0; /* Don't clean this guy up */
827 nexpire[hash]--;
828
829 /* free packets Qed at the end of this entry */
830 for (rte = rt->mfc_stall; rte != NULL; ) {
831 struct rtdetq *n = rte->next;
832
833 ip_mdq(rte->m, rte->ifp, rt, -1);
834 m_freem(rte->m);
835 free(rte, M_MRTABLE);
836 rte = n;
837 }
838 rt->mfc_stall = NULL;
839 }
840 }
841
842 /*
843 * It is possible that an entry is being inserted without an upcall
844 */
845 if (nstl == 0) {
846 if (mrtdebug & DEBUG_MFC)
847 log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n",
848 hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr),
849 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
850 mfccp->mfcc_parent);
851
852 for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) {
853 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
854 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
855 init_mfc_params(rt, mfccp);
856 if (rt->mfc_expire)
857 nexpire[hash]--;
858 rt->mfc_expire = 0;
859 break; /* XXX */
860 }
861 }
862 if (rt == NULL) { /* no upcall, so make a new entry */
863 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
864 if (rt == NULL) {
865 splx(s);
866 return ENOBUFS;
867 }
868
869 init_mfc_params(rt, mfccp);
870 rt->mfc_expire = 0;
871 rt->mfc_stall = NULL;
872
873 /* insert new entry at head of hash chain */
874 rt->mfc_next = mfctable[hash];
875 mfctable[hash] = rt;
876 }
877 }
878 splx(s);
879 return 0;
880 }
881
882 /*
883 * Delete an mfc entry
884 */
885 static int
886 del_mfc(struct mfcctl *mfccp)
887 {
888 struct in_addr origin;
889 struct in_addr mcastgrp;
890 struct mfc *rt;
891 struct mfc **nptr;
892 u_long hash;
893 int s;
894
895 origin = mfccp->mfcc_origin;
896 mcastgrp = mfccp->mfcc_mcastgrp;
897
898 if (mrtdebug & DEBUG_MFC)
899 log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n",
900 (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr));
901
902 s = splnet();
903
904 hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
905 for (nptr = &mfctable[hash]; (rt = *nptr) != NULL; nptr = &rt->mfc_next)
906 if (origin.s_addr == rt->mfc_origin.s_addr &&
907 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
908 rt->mfc_stall == NULL)
909 break;
910 if (rt == NULL) {
911 splx(s);
912 return EADDRNOTAVAIL;
913 }
914
915 *nptr = rt->mfc_next;
916 free(rt, M_MRTABLE);
917
918 splx(s);
919
920 return 0;
921 }
922
923 /*
924 * Send a message to mrouted on the multicast routing socket
925 */
926 static int
927 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
928 {
929 if (s) {
930 if (sbappendaddr(&s->so_rcv, (struct sockaddr *)src, mm, NULL) != 0) {
931 sorwakeup(s);
932 return 0;
933 }
934 }
935 m_freem(mm);
936 return -1;
937 }
938
939 /*
940 * IP multicast forwarding function. This function assumes that the packet
941 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
942 * pointed to by "ifp", and the packet is to be relayed to other networks
943 * that have members of the packet's destination IP multicast group.
944 *
945 * The packet is returned unscathed to the caller, unless it is
946 * erroneous, in which case a non-zero return value tells the caller to
947 * discard it.
948 */
949
950 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
951
952 static int
953 X_ip_mforward(struct ip *ip, struct ifnet *ifp,
954 struct mbuf *m, struct ip_moptions *imo)
955 {
956 struct mfc *rt;
957 int s;
958 vifi_t vifi;
959
960 if (mrtdebug & DEBUG_FORWARD)
961 log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n",
962 (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr),
963 (void *)ifp);
964
965 if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 ||
966 ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
967 /*
968 * Packet arrived via a physical interface or
969 * an encapsulated tunnel.
970 */
971 } else {
972 /*
973 * Packet arrived through a source-route tunnel.
974 * Source-route tunnels are no longer supported.
975 */
976 static int last_log;
977 if (last_log != time_second) {
978 last_log = time_second;
979 log(LOG_ERR,
980 "ip_mforward: received source-routed packet from %lx\n",
981 (u_long)ntohl(ip->ip_src.s_addr));
982 }
983 return 1;
984 }
985
986 if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
987 if (ip->ip_ttl < 255)
988 ip->ip_ttl++; /* compensate for -1 in *_send routines */
989 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
990 struct vif *vifp = viftable + vifi;
991
992 printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s%d)\n",
993 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr),
994 vifi,
995 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
996 vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
997 }
998 return ip_mdq(m, ifp, NULL, vifi);
999 }
1000 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1001 printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n",
1002 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr));
1003 if (!imo)
1004 printf("In fact, no options were specified at all\n");
1005 }
1006
1007 /*
1008 * Don't forward a packet with time-to-live of zero or one,
1009 * or a packet destined to a local-only group.
1010 */
1011 if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1012 return 0;
1013
1014 /*
1015 * Determine forwarding vifs from the forwarding cache table
1016 */
1017 s = splnet();
1018 ++mrtstat.mrts_mfc_lookups;
1019 rt = mfc_find(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1020
1021 /* Entry exists, so forward if necessary */
1022 if (rt != NULL) {
1023 splx(s);
1024 return ip_mdq(m, ifp, rt, -1);
1025 } else {
1026 /*
1027 * If we don't have a route for packet's origin,
1028 * Make a copy of the packet & send message to routing daemon
1029 */
1030
1031 struct mbuf *mb0;
1032 struct rtdetq *rte;
1033 u_long hash;
1034 int hlen = ip->ip_hl << 2;
1035
1036 ++mrtstat.mrts_mfc_misses;
1037
1038 mrtstat.mrts_no_route++;
1039 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1040 log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n",
1041 (u_long)ntohl(ip->ip_src.s_addr),
1042 (u_long)ntohl(ip->ip_dst.s_addr));
1043
1044 /*
1045 * Allocate mbufs early so that we don't do extra work if we are
1046 * just going to fail anyway. Make sure to pullup the header so
1047 * that other people can't step on it.
1048 */
1049 rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT);
1050 if (rte == NULL) {
1051 splx(s);
1052 return ENOBUFS;
1053 }
1054 mb0 = m_copy(m, 0, M_COPYALL);
1055 if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
1056 mb0 = m_pullup(mb0, hlen);
1057 if (mb0 == NULL) {
1058 free(rte, M_MRTABLE);
1059 splx(s);
1060 return ENOBUFS;
1061 }
1062
1063 /* is there an upcall waiting for this flow ? */
1064 hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1065 for (rt = mfctable[hash]; rt; rt = rt->mfc_next) {
1066 if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
1067 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1068 (rt->mfc_stall != NULL))
1069 break;
1070 }
1071
1072 if (rt == NULL) {
1073 int i;
1074 struct igmpmsg *im;
1075 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
1076 struct mbuf *mm;
1077
1078 /*
1079 * Locate the vifi for the incoming interface for this packet.
1080 * If none found, drop packet.
1081 */
1082 for (vifi=0; vifi<numvifs && viftable[vifi].v_ifp != ifp; vifi++)
1083 ;
1084 if (vifi >= numvifs) /* vif not found, drop packet */
1085 goto non_fatal;
1086
1087 /* no upcall, so make a new entry */
1088 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1089 if (rt == NULL)
1090 goto fail;
1091 /* Make a copy of the header to send to the user level process */
1092 mm = m_copy(mb0, 0, hlen);
1093 if (mm == NULL)
1094 goto fail1;
1095
1096 /*
1097 * Send message to routing daemon to install
1098 * a route into the kernel table
1099 */
1100
1101 im = mtod(mm, struct igmpmsg *);
1102 im->im_msgtype = IGMPMSG_NOCACHE;
1103 im->im_mbz = 0;
1104 im->im_vif = vifi;
1105
1106 mrtstat.mrts_upcalls++;
1107
1108 k_igmpsrc.sin_addr = ip->ip_src;
1109 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
1110 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
1111 ++mrtstat.mrts_upq_sockfull;
1112 fail1:
1113 free(rt, M_MRTABLE);
1114 fail:
1115 free(rte, M_MRTABLE);
1116 m_freem(mb0);
1117 splx(s);
1118 return ENOBUFS;
1119 }
1120
1121 /* insert new entry at head of hash chain */
1122 rt->mfc_origin.s_addr = ip->ip_src.s_addr;
1123 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr;
1124 rt->mfc_expire = UPCALL_EXPIRE;
1125 nexpire[hash]++;
1126 for (i = 0; i < numvifs; i++)
1127 rt->mfc_ttls[i] = 0;
1128 rt->mfc_parent = -1;
1129
1130 /* link into table */
1131 rt->mfc_next = mfctable[hash];
1132 mfctable[hash] = rt;
1133 rt->mfc_stall = rte;
1134
1135 } else {
1136 /* determine if q has overflowed */
1137 int npkts = 0;
1138 struct rtdetq **p;
1139
1140 /*
1141 * XXX ouch! we need to append to the list, but we
1142 * only have a pointer to the front, so we have to
1143 * scan the entire list every time.
1144 */
1145 for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
1146 npkts++;
1147
1148 if (npkts > MAX_UPQ) {
1149 mrtstat.mrts_upq_ovflw++;
1150 non_fatal:
1151 free(rte, M_MRTABLE);
1152 m_freem(mb0);
1153 splx(s);
1154 return 0;
1155 }
1156
1157 /* Add this entry to the end of the queue */
1158 *p = rte;
1159 }
1160
1161 rte->m = mb0;
1162 rte->ifp = ifp;
1163 rte->next = NULL;
1164
1165 splx(s);
1166
1167 return 0;
1168 }
1169 }
1170
1171 /*
1172 * Clean up the cache entry if upcall is not serviced
1173 */
1174 static void
1175 expire_upcalls(void *unused)
1176 {
1177 struct rtdetq *rte;
1178 struct mfc *mfc, **nptr;
1179 int i;
1180 int s;
1181
1182 s = splnet();
1183 for (i = 0; i < MFCTBLSIZ; i++) {
1184 if (nexpire[i] == 0)
1185 continue;
1186 nptr = &mfctable[i];
1187 for (mfc = *nptr; mfc != NULL; mfc = *nptr) {
1188 /*
1189 * Skip real cache entries
1190 * Make sure it wasn't marked to not expire (shouldn't happen)
1191 * If it expires now
1192 */
1193 if (mfc->mfc_stall != NULL && mfc->mfc_expire != 0 &&
1194 --mfc->mfc_expire == 0) {
1195 if (mrtdebug & DEBUG_EXPIRE)
1196 log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n",
1197 (u_long)ntohl(mfc->mfc_origin.s_addr),
1198 (u_long)ntohl(mfc->mfc_mcastgrp.s_addr));
1199 /*
1200 * drop all the packets
1201 * free the mbuf with the pkt, if, timing info
1202 */
1203 for (rte = mfc->mfc_stall; rte; ) {
1204 struct rtdetq *n = rte->next;
1205
1206 m_freem(rte->m);
1207 free(rte, M_MRTABLE);
1208 rte = n;
1209 }
1210 ++mrtstat.mrts_cache_cleanups;
1211 nexpire[i]--;
1212
1213 *nptr = mfc->mfc_next;
1214 free(mfc, M_MRTABLE);
1215 } else {
1216 nptr = &mfc->mfc_next;
1217 }
1218 }
1219 }
1220 splx(s);
1221 expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT);
1222 }
1223
1224 /*
1225 * Packet forwarding routine once entry in the cache is made
1226 */
1227 static int
1228 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
1229 {
1230 struct ip *ip = mtod(m, struct ip *);
1231 vifi_t vifi;
1232 int plen = ip->ip_len;
1233
1234 /*
1235 * Macro to send packet on vif. Since RSVP packets don't get counted on
1236 * input, they shouldn't get counted on output, so statistics keeping is
1237 * separate.
1238 */
1239 #define MC_SEND(ip,vifp,m) { \
1240 if ((vifp)->v_flags & VIFF_TUNNEL) \
1241 encap_send((ip), (vifp), (m)); \
1242 else \
1243 phyint_send((ip), (vifp), (m)); \
1244 }
1245
1246 /*
1247 * If xmt_vif is not -1, send on only the requested vif.
1248 *
1249 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
1250 */
1251 if (xmt_vif < numvifs) {
1252 MC_SEND(ip, viftable + xmt_vif, m);
1253 return 1;
1254 }
1255
1256 /*
1257 * Don't forward if it didn't arrive from the parent vif for its origin.
1258 */
1259 vifi = rt->mfc_parent;
1260 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1261 /* came in the wrong interface */
1262 if (mrtdebug & DEBUG_FORWARD)
1263 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1264 (void *)ifp, vifi, (void *)viftable[vifi].v_ifp);
1265 ++mrtstat.mrts_wrong_if;
1266 ++rt->mfc_wrong_if;
1267 /*
1268 * If we are doing PIM assert processing, and we are forwarding
1269 * packets on this interface, and it is a broadcast medium
1270 * interface (and not a tunnel), send a message to the routing daemon.
1271 */
1272 if (pim_assert && rt->mfc_ttls[vifi] &&
1273 (ifp->if_flags & IFF_BROADCAST) &&
1274 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1275 struct timeval now;
1276 u_long delta;
1277
1278 GET_TIME(now);
1279
1280 TV_DELTA(rt->mfc_last_assert, now, delta);
1281
1282 if (delta > ASSERT_MSG_TIME) {
1283 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
1284 struct igmpmsg *im;
1285 int hlen = ip->ip_hl << 2;
1286 struct mbuf *mm = m_copy(m, 0, hlen);
1287
1288 if (mm && (M_HASCL(mm) || mm->m_len < hlen))
1289 mm = m_pullup(mm, hlen);
1290 if (mm == NULL)
1291 return ENOBUFS;
1292
1293 rt->mfc_last_assert = now;
1294
1295 im = mtod(mm, struct igmpmsg *);
1296 im->im_msgtype = IGMPMSG_WRONGVIF;
1297 im->im_mbz = 0;
1298 im->im_vif = vifi;
1299
1300 k_igmpsrc.sin_addr = im->im_src;
1301
1302 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
1303 log(LOG_WARNING,
1304 "ip_mforward: ip_mrouter socket queue full\n");
1305 ++mrtstat.mrts_upq_sockfull;
1306 return ENOBUFS;
1307 }
1308 }
1309 }
1310 return 0;
1311 }
1312
1313 /* If I sourced this packet, it counts as output, else it was input. */
1314 if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1315 viftable[vifi].v_pkt_out++;
1316 viftable[vifi].v_bytes_out += plen;
1317 } else {
1318 viftable[vifi].v_pkt_in++;
1319 viftable[vifi].v_bytes_in += plen;
1320 }
1321 rt->mfc_pkt_cnt++;
1322 rt->mfc_byte_cnt += plen;
1323
1324 /*
1325 * For each vif, decide if a copy of the packet should be forwarded.
1326 * Forward if:
1327 * - the ttl exceeds the vif's threshold
1328 * - there are group members downstream on interface
1329 */
1330 for (vifi = 0; vifi < numvifs; vifi++)
1331 if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1332 viftable[vifi].v_pkt_out++;
1333 viftable[vifi].v_bytes_out += plen;
1334 MC_SEND(ip, viftable+vifi, m);
1335 }
1336
1337 return 0;
1338 }
1339
1340 /*
1341 * check if a vif number is legal/ok. This is used by ip_output.
1342 */
1343 static int
1344 X_legal_vif_num(int vif)
1345 {
1346 return (vif >= 0 && vif < numvifs);
1347 }
1348
1349 /*
1350 * Return the local address used by this vif
1351 */
1352 static u_long
1353 X_ip_mcast_src(int vifi)
1354 {
1355 if (vifi >= 0 && vifi < numvifs)
1356 return viftable[vifi].v_lcl_addr.s_addr;
1357 else
1358 return INADDR_ANY;
1359 }
1360
1361 static void
1362 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
1363 {
1364 struct mbuf *mb_copy;
1365 int hlen = ip->ip_hl << 2;
1366
1367 /*
1368 * Make a new reference to the packet; make sure that
1369 * the IP header is actually copied, not just referenced,
1370 * so that ip_output() only scribbles on the copy.
1371 */
1372 mb_copy = m_copy(m, 0, M_COPYALL);
1373 if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
1374 mb_copy = m_pullup(mb_copy, hlen);
1375 if (mb_copy == NULL)
1376 return;
1377
1378 if (vifp->v_rate_limit == 0)
1379 tbf_send_packet(vifp, mb_copy);
1380 else
1381 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
1382 }
1383
1384 static void
1385 encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
1386 {
1387 struct mbuf *mb_copy;
1388 struct ip *ip_copy;
1389 int i, len = ip->ip_len;
1390
1391 /*
1392 * XXX: take care of delayed checksums.
1393 * XXX: if network interfaces are capable of computing checksum for
1394 * encapsulated multicast data packets, we need to reconsider this.
1395 */
1396 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1397 in_delayed_cksum(m);
1398 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1399 }
1400
1401 /*
1402 * copy the old packet & pullup its IP header into the
1403 * new mbuf so we can modify it. Try to fill the new
1404 * mbuf since if we don't the ethernet driver will.
1405 */
1406 MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER);
1407 if (mb_copy == NULL)
1408 return;
1409 #ifdef MAC
1410 mac_create_mbuf_multicast_encap(m, vifp->v_ifp, mb_copy);
1411 #endif
1412 mb_copy->m_data += max_linkhdr;
1413 mb_copy->m_len = sizeof(multicast_encap_iphdr);
1414
1415 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1416 m_freem(mb_copy);
1417 return;
1418 }
1419 i = MHLEN - M_LEADINGSPACE(mb_copy);
1420 if (i > len)
1421 i = len;
1422 mb_copy = m_pullup(mb_copy, i);
1423 if (mb_copy == NULL)
1424 return;
1425 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
1426
1427 /*
1428 * fill in the encapsulating IP header.
1429 */
1430 ip_copy = mtod(mb_copy, struct ip *);
1431 *ip_copy = multicast_encap_iphdr;
1432 #ifdef RANDOM_IP_ID
1433 ip_copy->ip_id = ip_randomid();
1434 #else
1435 ip_copy->ip_id = htons(ip_id++);
1436 #endif
1437 ip_copy->ip_len += len;
1438 ip_copy->ip_src = vifp->v_lcl_addr;
1439 ip_copy->ip_dst = vifp->v_rmt_addr;
1440
1441 /*
1442 * turn the encapsulated IP header back into a valid one.
1443 */
1444 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1445 --ip->ip_ttl;
1446 ip->ip_len = htons(ip->ip_len);
1447 ip->ip_off = htons(ip->ip_off);
1448 ip->ip_sum = 0;
1449 mb_copy->m_data += sizeof(multicast_encap_iphdr);
1450 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1451 mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1452
1453 if (vifp->v_rate_limit == 0)
1454 tbf_send_packet(vifp, mb_copy);
1455 else
1456 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
1457 }
1458
1459 /*
1460 * Token bucket filter module
1461 */
1462
1463 static void
1464 tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_long p_len)
1465 {
1466 struct tbf *t = vifp->v_tbf;
1467
1468 if (p_len > MAX_BKT_SIZE) { /* drop if packet is too large */
1469 mrtstat.mrts_pkt2large++;
1470 m_freem(m);
1471 return;
1472 }
1473
1474 tbf_update_tokens(vifp);
1475
1476 if (t->tbf_q_len == 0) { /* queue empty... */
1477 if (p_len <= t->tbf_n_tok) { /* send packet if enough tokens */
1478 t->tbf_n_tok -= p_len;
1479 tbf_send_packet(vifp, m);
1480 } else { /* no, queue packet and try later */
1481 tbf_queue(vifp, m);
1482 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1483 }
1484 } else if (t->tbf_q_len < t->tbf_max_q_len) {
1485 /* finite queue length, so queue pkts and process queue */
1486 tbf_queue(vifp, m);
1487 tbf_process_q(vifp);
1488 } else {
1489 /* queue full, try to dq and queue and process */
1490 if (!tbf_dq_sel(vifp, ip)) {
1491 mrtstat.mrts_q_overflow++;
1492 m_freem(m);
1493 } else {
1494 tbf_queue(vifp, m);
1495 tbf_process_q(vifp);
1496 }
1497 }
1498 }
1499
1500 /*
1501 * adds a packet to the queue at the interface
1502 */
1503 static void
1504 tbf_queue(struct vif *vifp, struct mbuf *m)
1505 {
1506 int s = splnet();
1507 struct tbf *t = vifp->v_tbf;
1508
1509 if (t->tbf_t == NULL) /* Queue was empty */
1510 t->tbf_q = m;
1511 else /* Insert at tail */
1512 t->tbf_t->m_act = m;
1513
1514 t->tbf_t = m; /* Set new tail pointer */
1515
1516 #ifdef DIAGNOSTIC
1517 /* Make sure we didn't get fed a bogus mbuf */
1518 if (m->m_act)
1519 panic("tbf_queue: m_act");
1520 #endif
1521 m->m_act = NULL;
1522
1523 t->tbf_q_len++;
1524
1525 splx(s);
1526 }
1527
1528 /*
1529 * processes the queue at the interface
1530 */
1531 static void
1532 tbf_process_q(struct vif *vifp)
1533 {
1534 int s = splnet();
1535 struct tbf *t = vifp->v_tbf;
1536
1537 /* loop through the queue at the interface and send as many packets
1538 * as possible
1539 */
1540 while (t->tbf_q_len > 0) {
1541 struct mbuf *m = t->tbf_q;
1542 int len = mtod(m, struct ip *)->ip_len;
1543
1544 /* determine if the packet can be sent */
1545 if (len > t->tbf_n_tok) /* not enough tokens, we are done */
1546 break;
1547 /* ok, reduce no of tokens, dequeue and send the packet. */
1548 t->tbf_n_tok -= len;
1549
1550 t->tbf_q = m->m_act;
1551 if (--t->tbf_q_len == 0)
1552 t->tbf_t = NULL;
1553
1554 m->m_act = NULL;
1555 tbf_send_packet(vifp, m);
1556 }
1557 splx(s);
1558 }
1559
1560 static void
1561 tbf_reprocess_q(void *xvifp)
1562 {
1563 struct vif *vifp = xvifp;
1564
1565 if (ip_mrouter == NULL)
1566 return;
1567 tbf_update_tokens(vifp);
1568 tbf_process_q(vifp);
1569 if (vifp->v_tbf->tbf_q_len)
1570 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1571 }
1572
1573 /* function that will selectively discard a member of the queue
1574 * based on the precedence value and the priority
1575 */
1576 static int
1577 tbf_dq_sel(struct vif *vifp, struct ip *ip)
1578 {
1579 int s = splnet();
1580 u_int p;
1581 struct mbuf *m, *last;
1582 struct mbuf **np;
1583 struct tbf *t = vifp->v_tbf;
1584
1585 p = priority(vifp, ip);
1586
1587 np = &t->tbf_q;
1588 last = NULL;
1589 while ((m = *np) != NULL) {
1590 if (p > priority(vifp, mtod(m, struct ip *))) {
1591 *np = m->m_act;
1592 /* If we're removing the last packet, fix the tail pointer */
1593 if (m == t->tbf_t)
1594 t->tbf_t = last;
1595 m_freem(m);
1596 /* It's impossible for the queue to be empty, but check anyways. */
1597 if (--t->tbf_q_len == 0)
1598 t->tbf_t = NULL;
1599 splx(s);
1600 mrtstat.mrts_drop_sel++;
1601 return 1;
1602 }
1603 np = &m->m_act;
1604 last = m;
1605 }
1606 splx(s);
1607 return 0;
1608 }
1609
1610 static void
1611 tbf_send_packet(struct vif *vifp, struct mbuf *m)
1612 {
1613 int s = splnet();
1614
1615 if (vifp->v_flags & VIFF_TUNNEL) /* If tunnel options */
1616 ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, NULL, NULL);
1617 else {
1618 struct ip_moptions imo;
1619 int error;
1620 static struct route ro; /* XXX check this */
1621
1622 imo.imo_multicast_ifp = vifp->v_ifp;
1623 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1;
1624 imo.imo_multicast_loop = 1;
1625 imo.imo_multicast_vif = -1;
1626
1627 /*
1628 * Re-entrancy should not be a problem here, because
1629 * the packets that we send out and are looped back at us
1630 * should get rejected because they appear to come from
1631 * the loopback interface, thus preventing looping.
1632 */
1633 error = ip_output(m, NULL, &ro, IP_FORWARDING, &imo, NULL);
1634
1635 if (mrtdebug & DEBUG_XMIT)
1636 log(LOG_DEBUG, "phyint_send on vif %d err %d\n",
1637 (int)(vifp - viftable), error);
1638 }
1639 splx(s);
1640 }
1641
1642 /* determine the current time and then
1643 * the elapsed time (between the last time and time now)
1644 * in milliseconds & update the no. of tokens in the bucket
1645 */
1646 static void
1647 tbf_update_tokens(struct vif *vifp)
1648 {
1649 struct timeval tp;
1650 u_long tm;
1651 int s = splnet();
1652 struct tbf *t = vifp->v_tbf;
1653
1654 GET_TIME(tp);
1655
1656 TV_DELTA(tp, t->tbf_last_pkt_t, tm);
1657
1658 /*
1659 * This formula is actually
1660 * "time in seconds" * "bytes/second".
1661 *
1662 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
1663 *
1664 * The (1000/1024) was introduced in add_vif to optimize
1665 * this divide into a shift.
1666 */
1667 t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8;
1668 t->tbf_last_pkt_t = tp;
1669
1670 if (t->tbf_n_tok > MAX_BKT_SIZE)
1671 t->tbf_n_tok = MAX_BKT_SIZE;
1672
1673 splx(s);
1674 }
1675
1676 static int
1677 priority(struct vif *vifp, struct ip *ip)
1678 {
1679 int prio = 50; /* the lowest priority -- default case */
1680
1681 /* temporary hack; may add general packet classifier some day */
1682
1683 /*
1684 * The UDP port space is divided up into four priority ranges:
1685 * [0, 16384) : unclassified - lowest priority
1686 * [16384, 32768) : audio - highest priority
1687 * [32768, 49152) : whiteboard - medium priority
1688 * [49152, 65536) : video - low priority
1689 *
1690 * Everything else gets lowest priority.
1691 */
1692 if (ip->ip_p == IPPROTO_UDP) {
1693 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1694 switch (ntohs(udp->uh_dport) & 0xc000) {
1695 case 0x4000:
1696 prio = 70;
1697 break;
1698 case 0x8000:
1699 prio = 60;
1700 break;
1701 case 0xc000:
1702 prio = 55;
1703 break;
1704 }
1705 }
1706 return prio;
1707 }
1708
1709 /*
1710 * End of token bucket filter modifications
1711 */
1712
1713 static int
1714 X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt)
1715 {
1716 int error, vifi, s;
1717
1718 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1719 return EOPNOTSUPP;
1720
1721 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
1722 if (error)
1723 return error;
1724
1725 s = splnet();
1726
1727 if (vifi < 0 || vifi >= numvifs) { /* Error if vif is invalid */
1728 splx(s);
1729 return EADDRNOTAVAIL;
1730 }
1731
1732 if (sopt->sopt_name == IP_RSVP_VIF_ON) {
1733 /* Check if socket is available. */
1734 if (viftable[vifi].v_rsvpd != NULL) {
1735 splx(s);
1736 return EADDRINUSE;
1737 }
1738
1739 viftable[vifi].v_rsvpd = so;
1740 /* This may seem silly, but we need to be sure we don't over-increment
1741 * the RSVP counter, in case something slips up.
1742 */
1743 if (!viftable[vifi].v_rsvp_on) {
1744 viftable[vifi].v_rsvp_on = 1;
1745 rsvp_on++;
1746 }
1747 } else { /* must be VIF_OFF */
1748 /*
1749 * XXX as an additional consistency check, one could make sure
1750 * that viftable[vifi].v_rsvpd == so, otherwise passing so as
1751 * first parameter is pretty useless.
1752 */
1753 viftable[vifi].v_rsvpd = NULL;
1754 /*
1755 * This may seem silly, but we need to be sure we don't over-decrement
1756 * the RSVP counter, in case something slips up.
1757 */
1758 if (viftable[vifi].v_rsvp_on) {
1759 viftable[vifi].v_rsvp_on = 0;
1760 rsvp_on--;
1761 }
1762 }
1763 splx(s);
1764 return 0;
1765 }
1766
1767 static void
1768 X_ip_rsvp_force_done(struct socket *so)
1769 {
1770 int vifi;
1771 int s;
1772
1773 /* Don't bother if it is not the right type of socket. */
1774 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1775 return;
1776
1777 s = splnet();
1778
1779 /* The socket may be attached to more than one vif...this
1780 * is perfectly legal.
1781 */
1782 for (vifi = 0; vifi < numvifs; vifi++) {
1783 if (viftable[vifi].v_rsvpd == so) {
1784 viftable[vifi].v_rsvpd = NULL;
1785 /* This may seem silly, but we need to be sure we don't
1786 * over-decrement the RSVP counter, in case something slips up.
1787 */
1788 if (viftable[vifi].v_rsvp_on) {
1789 viftable[vifi].v_rsvp_on = 0;
1790 rsvp_on--;
1791 }
1792 }
1793 }
1794
1795 splx(s);
1796 }
1797
1798 static void
1799 X_rsvp_input(struct mbuf *m, int off)
1800 {
1801 int vifi;
1802 struct ip *ip = mtod(m, struct ip *);
1803 struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET };
1804 int s;
1805 struct ifnet *ifp;
1806
1807 if (rsvpdebug)
1808 printf("rsvp_input: rsvp_on %d\n",rsvp_on);
1809
1810 /* Can still get packets with rsvp_on = 0 if there is a local member
1811 * of the group to which the RSVP packet is addressed. But in this
1812 * case we want to throw the packet away.
1813 */
1814 if (!rsvp_on) {
1815 m_freem(m);
1816 return;
1817 }
1818
1819 s = splnet();
1820
1821 if (rsvpdebug)
1822 printf("rsvp_input: check vifs\n");
1823
1824 #ifdef DIAGNOSTIC
1825 if (!(m->m_flags & M_PKTHDR))
1826 panic("rsvp_input no hdr");
1827 #endif
1828
1829 ifp = m->m_pkthdr.rcvif;
1830 /* Find which vif the packet arrived on. */
1831 for (vifi = 0; vifi < numvifs; vifi++)
1832 if (viftable[vifi].v_ifp == ifp)
1833 break;
1834
1835 if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) {
1836 /*
1837 * If the old-style non-vif-associated socket is set,
1838 * then use it. Otherwise, drop packet since there
1839 * is no specific socket for this vif.
1840 */
1841 if (ip_rsvpd != NULL) {
1842 if (rsvpdebug)
1843 printf("rsvp_input: Sending packet up old-style socket\n");
1844 rip_input(m, off); /* xxx */
1845 } else {
1846 if (rsvpdebug && vifi == numvifs)
1847 printf("rsvp_input: Can't find vif for packet.\n");
1848 else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL)
1849 printf("rsvp_input: No socket defined for vif %d\n",vifi);
1850 m_freem(m);
1851 }
1852 splx(s);
1853 return;
1854 }
1855 rsvp_src.sin_addr = ip->ip_src;
1856
1857 if (rsvpdebug && m)
1858 printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n",
1859 m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
1860
1861 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) {
1862 if (rsvpdebug)
1863 printf("rsvp_input: Failed to append to socket\n");
1864 } else {
1865 if (rsvpdebug)
1866 printf("rsvp_input: send packet up\n");
1867 }
1868
1869 splx(s);
1870 }
1871
1872 static int
1873 ip_mroute_modevent(module_t mod, int type, void *unused)
1874 {
1875 int s;
1876
1877 switch (type) {
1878 case MOD_LOAD:
1879 s = splnet();
1880 /* XXX Protect against multiple loading */
1881 ip_mcast_src = X_ip_mcast_src;
1882 ip_mforward = X_ip_mforward;
1883 ip_mrouter_done = X_ip_mrouter_done;
1884 ip_mrouter_get = X_ip_mrouter_get;
1885 ip_mrouter_set = X_ip_mrouter_set;
1886 ip_rsvp_force_done = X_ip_rsvp_force_done;
1887 ip_rsvp_vif = X_ip_rsvp_vif;
1888 legal_vif_num = X_legal_vif_num;
1889 mrt_ioctl = X_mrt_ioctl;
1890 rsvp_input_p = X_rsvp_input;
1891 splx(s);
1892 break;
1893
1894 case MOD_UNLOAD:
1895 if (ip_mrouter)
1896 return EINVAL;
1897
1898 s = splnet();
1899 ip_mcast_src = NULL;
1900 ip_mforward = NULL;
1901 ip_mrouter_done = NULL;
1902 ip_mrouter_get = NULL;
1903 ip_mrouter_set = NULL;
1904 ip_rsvp_force_done = NULL;
1905 ip_rsvp_vif = NULL;
1906 legal_vif_num = NULL;
1907 mrt_ioctl = NULL;
1908 rsvp_input_p = NULL;
1909 splx(s);
1910 break;
1911 }
1912 return 0;
1913 }
1914
1915 static moduledata_t ip_mroutemod = {
1916 "ip_mroute",
1917 ip_mroute_modevent,
1918 0
1919 };
1920 DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);
Cache object: 33e093351dfccc52823d2885b6d7ab37
|