fxr.watson.org: FREEBSD-4-STABLE sys/netinet/ip

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_mroute.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10

1 /* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * Modified by Bill Fenner, PARC, April 1995 10 * Modified by Ahmed Helmy, SGI, June 1996 11 * Modified by George Edmond Eddy (Rusty), ISI, February 1998 12 * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000 13 * Modified by Hitoshi Asaeda, WIDE, August 2000 14 * Modified by Pavlin Radoslavov, ICSI, October 2002 15 * 16 * MROUTING Revision: 3.5 17 * and PIM-SMv2 and PIM-DM support, advanced API support, 18 * bandwidth metering and signaling 19 * 20 * $FreeBSD$ 21 */ 22 23 #include "opt_mrouting.h" 24 #include "opt_random_ip_id.h" 25 26 #ifdef PIM 27 #define _PIM_VT 1 28 #endif 29 30 #include <sys/param.h> 31 #include <sys/kernel.h> 32 #include <sys/malloc.h> 33 #include <sys/mbuf.h> 34 #include <sys/protosw.h> 35 #include <sys/socket.h> 36 #include <sys/socketvar.h> 37 #include <sys/sockio.h> 38 #include <sys/sysctl.h> 39 #include <sys/syslog.h> 40 #include <sys/systm.h> 41 #include <sys/time.h> 42 #include <net/if.h> 43 #include <net/route.h> 44 #include <netinet/in.h> 45 #include <netinet/igmp.h> 46 #include <netinet/in_systm.h> 47 #include <netinet/in_var.h> 48 #include <netinet/ip.h> 49 #include <netinet/ip_mroute.h> 50 #include <netinet/ip_var.h> 51 #ifdef PIM 52 #include <netinet/pim.h> 53 #include <netinet/pim_var.h> 54 #endif 55 #include <netinet/udp.h> 56 #include <machine/in_cksum.h> 57 58 /* 59 * Control debugging code for rsvp and multicast routing code. 60 * Can only set them with the debugger. 61 */ 62 static u_int rsvpdebug; /* non-zero enables debugging */ 63 64 static u_int mrtdebug; /* any set of the flags below */ 65 66 #define DEBUG_MFC 0x02 67 #define DEBUG_FORWARD 0x04 68 #define DEBUG_EXPIRE 0x08 69 #define DEBUG_XMIT 0x10 70 #define DEBUG_PIM 0x20 71 72 #define VIFI_INVALID ((vifi_t) -1) 73 74 #define M_HASCL(m) ((m)->m_flags & M_EXT) 75 76 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables"); 77 78 static struct mrtstat mrtstat; 79 SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW, 80 &mrtstat, mrtstat, 81 "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)"); 82 83 static struct mfc *mfctable[MFCTBLSIZ]; 84 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, 85 &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]", 86 "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)"); 87 88 static struct vif viftable[MAXVIFS]; 89 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD, 90 &viftable, sizeof(viftable), "S,vif[MAXVIFS]", 91 "Multicast Virtual Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); 92 93 static u_char nexpire[MFCTBLSIZ]; 94 95 static struct callout_handle expire_upcalls_ch; 96 97 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 98 #define UPCALL_EXPIRE 6 /* number of timeouts */ 99 100 /* 101 * Define the token bucket filter structures 102 * tbftable -> each vif has one of these for storing info 103 */ 104 105 static struct tbf tbftable[MAXVIFS]; 106 #define TBF_REPROCESS (hz / 100) /* 100x / second */ 107 108 /* 109 * 'Interfaces' associated with decapsulator (so we can tell 110 * packets that went through it from ones that get reflected 111 * by a broken gateway). These interfaces are never linked into 112 * the system ifnet list & no routes point to them. I.e., packets 113 * can't be sent this way. They only exist as a placeholder for 114 * multicast source verification. 115 */ 116 static struct ifnet multicast_decap_if[MAXVIFS]; 117 118 #define ENCAP_TTL 64 119 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */ 120 121 /* prototype IP hdr for encapsulated packets */ 122 static struct ip multicast_encap_iphdr = { 123 #if BYTE_ORDER == LITTLE_ENDIAN 124 sizeof(struct ip) >> 2, IPVERSION, 125 #else 126 IPVERSION, sizeof(struct ip) >> 2, 127 #endif 128 0, /* tos */ 129 sizeof(struct ip), /* total length */ 130 0, /* id */ 131 0, /* frag offset */ 132 ENCAP_TTL, ENCAP_PROTO, 133 0, /* checksum */ 134 }; 135 136 /* 137 * Bandwidth meter variables and constants 138 */ 139 static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters"); 140 /* 141 * Pending timeouts are stored in a hash table, the key being the 142 * expiration time. Periodically, the entries are analysed and processed. 143 */ 144 #define BW_METER_BUCKETS 1024 145 static struct bw_meter *bw_meter_timers[BW_METER_BUCKETS]; 146 static struct callout_handle bw_meter_ch; 147 #define BW_METER_PERIOD (hz) /* periodical handling of bw meters */ 148 149 /* 150 * Pending upcalls are stored in a vector which is flushed when 151 * full, or periodically 152 */ 153 static struct bw_upcall bw_upcalls[BW_UPCALLS_MAX]; 154 static u_int bw_upcalls_n; /* # of pending upcalls */ 155 static struct callout_handle bw_upcalls_ch; 156 #define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */ 157 158 #ifdef PIM 159 static struct pimstat pimstat; 160 SYSCTL_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD, 161 &pimstat, pimstat, 162 "PIM Statistics (struct pimstat, netinet/pim_var.h)"); 163 164 /* 165 * Note: the PIM Register encapsulation adds the following in front of a 166 * data packet: 167 * 168 * struct pim_encap_hdr { 169 * struct ip ip; 170 * struct pim_encap_pimhdr pim; 171 * } 172 * 173 */ 174 175 struct pim_encap_pimhdr { 176 struct pim pim; 177 uint32_t flags; 178 }; 179 180 static struct ip pim_encap_iphdr = { 181 #if BYTE_ORDER == LITTLE_ENDIAN 182 sizeof(struct ip) >> 2, 183 IPVERSION, 184 #else 185 IPVERSION, 186 sizeof(struct ip) >> 2, 187 #endif 188 0, /* tos */ 189 sizeof(struct ip), /* total length */ 190 0, /* id */ 191 0, /* frag offset */ 192 ENCAP_TTL, 193 IPPROTO_PIM, 194 0, /* checksum */ 195 }; 196 197 static struct pim_encap_pimhdr pim_encap_pimhdr = { 198 { 199 PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */ 200 0, /* reserved */ 201 0, /* checksum */ 202 }, 203 0 /* flags */ 204 }; 205 206 static struct ifnet multicast_register_if; 207 static vifi_t reg_vif_num = VIFI_INVALID; 208 #endif /* PIM */ 209 210 /* 211 * Private variables. 212 */ 213 static vifi_t numvifs; 214 static int have_encap_tunnel; 215 216 /* 217 * one-back cache used by ipip_input to locate a tunnel's vif 218 * given a datagram's src ip address. 219 */ 220 static u_long last_encap_src; 221 static struct vif *last_encap_vif; 222 223 static u_long X_ip_mcast_src(int vifi); 224 static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, 225 struct mbuf *m, struct ip_moptions *imo); 226 static int X_ip_mrouter_done(void); 227 static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); 228 static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); 229 static int X_legal_vif_num(int vif); 230 static int X_mrt_ioctl(int cmd, caddr_t data); 231 232 static int get_sg_cnt(struct sioc_sg_req *); 233 static int get_vif_cnt(struct sioc_vif_req *); 234 static int ip_mrouter_init(struct socket *, int); 235 static int add_vif(struct vifctl *); 236 static int del_vif(vifi_t); 237 static int add_mfc(struct mfcctl2 *); 238 static int del_mfc(struct mfcctl2 *); 239 static int set_api_config(uint32_t *); /* chose API capabilities */ 240 static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); 241 static int set_assert(int); 242 static void expire_upcalls(void *); 243 static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); 244 static void phyint_send(struct ip *, struct vif *, struct mbuf *); 245 static void encap_send(struct ip *, struct vif *, struct mbuf *); 246 static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long); 247 static void tbf_queue(struct vif *, struct mbuf *); 248 static void tbf_process_q(struct vif *); 249 static void tbf_reprocess_q(void *); 250 static int tbf_dq_sel(struct vif *, struct ip *); 251 static void tbf_send_packet(struct vif *, struct mbuf *); 252 static void tbf_update_tokens(struct vif *); 253 static int priority(struct vif *, struct ip *); 254 255 /* 256 * Bandwidth monitoring 257 */ 258 static void free_bw_list(struct bw_meter *list); 259 static int add_bw_upcall(struct bw_upcall *); 260 static int del_bw_upcall(struct bw_upcall *); 261 static void bw_meter_receive_packet(struct bw_meter *x, int plen, 262 struct timeval *nowp); 263 static void bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp); 264 static void bw_upcalls_send(void); 265 static void schedule_bw_meter(struct bw_meter *x, struct timeval *nowp); 266 static void unschedule_bw_meter(struct bw_meter *x); 267 static void bw_meter_process(void); 268 static void expire_bw_upcalls_send(void *); 269 static void expire_bw_meter_process(void *); 270 271 #ifdef PIM 272 static int pim_register_send(struct ip *, struct vif *, 273 struct mbuf *, struct mfc *); 274 static int pim_register_send_rp(struct ip *, struct vif *, 275 struct mbuf *, struct mfc *); 276 static int pim_register_send_upcall(struct ip *, struct vif *, 277 struct mbuf *, struct mfc *); 278 static struct mbuf *pim_register_prepare(struct ip *, struct mbuf *); 279 #endif 280 281 /* 282 * whether or not special PIM assert processing is enabled. 283 */ 284 static int pim_assert; 285 /* 286 * Rate limit for assert notification messages, in usec 287 */ 288 #define ASSERT_MSG_TIME 3000000 289 290 /* 291 * Kernel multicast routing API capabilities and setup. 292 * If more API capabilities are added to the kernel, they should be 293 * recorded in `mrt_api_support'. 294 */ 295 static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | 296 MRT_MFC_FLAGS_BORDER_VIF | 297 MRT_MFC_RP | 298 MRT_MFC_BW_UPCALL); 299 static uint32_t mrt_api_config = 0; 300 301 /* 302 * Hash function for a source, group entry 303 */ 304 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 305 ((g) >> 20) ^ ((g) >> 10) ^ (g)) 306 307 /* 308 * Find a route for a given origin IP address and Multicast group address 309 * Type of service parameter to be added in the future!!! 310 * Statistics are updated by the caller if needed 311 * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses) 312 */ 313 static struct mfc * 314 mfc_find(in_addr_t o, in_addr_t g) 315 { 316 struct mfc *rt; 317 318 for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next) 319 if ((rt->mfc_origin.s_addr == o) && 320 (rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL)) 321 break; 322 return rt; 323 } 324 325 /* 326 * Macros to compute elapsed time efficiently 327 * Borrowed from Van Jacobson's scheduling code 328 */ 329 #define TV_DELTA(a, b, delta) { \ 330 int xxs; \ 331 delta = (a).tv_usec - (b).tv_usec; \ 332 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 333 switch (xxs) { \ 334 case 2: \ 335 delta += 1000000; \ 336 /* FALLTHROUGH */ \ 337 case 1: \ 338 delta += 1000000; \ 339 break; \ 340 default: \ 341 delta += (1000000 * xxs); \ 342 } \ 343 } \ 344 } 345 346 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 347 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 348 349 /* 350 * Handle MRT setsockopt commands to modify the multicast routing tables. 351 */ 352 static int 353 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) 354 { 355 int error, optval; 356 vifi_t vifi; 357 struct vifctl vifc; 358 struct mfcctl2 mfc; 359 struct bw_upcall bw_upcall; 360 uint32_t i; 361 362 if (so != ip_mrouter && sopt->sopt_name != MRT_INIT) 363 return EPERM; 364 365 error = 0; 366 switch (sopt->sopt_name) { 367 case MRT_INIT: 368 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 369 if (error) 370 break; 371 error = ip_mrouter_init(so, optval); 372 break; 373 374 case MRT_DONE: 375 error = ip_mrouter_done(); 376 break; 377 378 case MRT_ADD_VIF: 379 error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); 380 if (error) 381 break; 382 error = add_vif(&vifc); 383 break; 384 385 case MRT_DEL_VIF: 386 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 387 if (error) 388 break; 389 error = del_vif(vifi); 390 break; 391 392 case MRT_ADD_MFC: 393 case MRT_DEL_MFC: 394 /* 395 * select data size depending on API version. 396 */ 397 if (sopt->sopt_name == MRT_ADD_MFC && 398 mrt_api_config & MRT_API_FLAGS_ALL) { 399 error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2), 400 sizeof(struct mfcctl2)); 401 } else { 402 error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl), 403 sizeof(struct mfcctl)); 404 bzero((caddr_t)&mfc + sizeof(struct mfcctl), 405 sizeof(mfc) - sizeof(struct mfcctl)); 406 } 407 if (error) 408 break; 409 if (sopt->sopt_name == MRT_ADD_MFC) 410 error = add_mfc(&mfc); 411 else 412 error = del_mfc(&mfc); 413 break; 414 415 case MRT_ASSERT: 416 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 417 if (error) 418 break; 419 set_assert(optval); 420 break; 421 422 case MRT_API_CONFIG: 423 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 424 if (!error) 425 error = set_api_config(&i); 426 if (!error) 427 error = sooptcopyout(sopt, &i, sizeof i); 428 break; 429 430 case MRT_ADD_BW_UPCALL: 431 case MRT_DEL_BW_UPCALL: 432 error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall, 433 sizeof bw_upcall); 434 if (error) 435 break; 436 if (sopt->sopt_name == MRT_ADD_BW_UPCALL) 437 error = add_bw_upcall(&bw_upcall); 438 else 439 error = del_bw_upcall(&bw_upcall); 440 break; 441 442 default: 443 error = EOPNOTSUPP; 444 break; 445 } 446 return error; 447 } 448 449 /* 450 * Handle MRT getsockopt commands 451 */ 452 static int 453 X_ip_mrouter_get(struct socket *so, struct sockopt *sopt) 454 { 455 int error; 456 static int version = 0x0305; /* !!! why is this here? XXX */ 457 458 switch (sopt->sopt_name) { 459 case MRT_VERSION: 460 error = sooptcopyout(sopt, &version, sizeof version); 461 break; 462 463 case MRT_ASSERT: 464 error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); 465 break; 466 467 case MRT_API_SUPPORT: 468 error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support); 469 break; 470 471 case MRT_API_CONFIG: 472 error = sooptcopyout(sopt, &mrt_api_config, sizeof mrt_api_config); 473 break; 474 475 default: 476 error = EOPNOTSUPP; 477 break; 478 } 479 return error; 480 } 481 482 /* 483 * Handle ioctl commands to obtain information from the cache 484 */ 485 static int 486 X_mrt_ioctl(int cmd, caddr_t data) 487 { 488 int error = 0; 489 490 switch (cmd) { 491 case SIOCGETVIFCNT: 492 error = get_vif_cnt((struct sioc_vif_req *)data); 493 break; 494 495 case SIOCGETSGCNT: 496 error = get_sg_cnt((struct sioc_sg_req *)data); 497 break; 498 499 default: 500 error = EINVAL; 501 break; 502 } 503 return error; 504 } 505 506 /* 507 * returns the packet, byte, rpf-failure count for the source group provided 508 */ 509 static int 510 get_sg_cnt(struct sioc_sg_req *req) 511 { 512 int s; 513 struct mfc *rt; 514 515 s = splnet(); 516 rt = mfc_find(req->src.s_addr, req->grp.s_addr); 517 splx(s); 518 if (rt == NULL) { 519 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 520 return EADDRNOTAVAIL; 521 } 522 req->pktcnt = rt->mfc_pkt_cnt; 523 req->bytecnt = rt->mfc_byte_cnt; 524 req->wrong_if = rt->mfc_wrong_if; 525 return 0; 526 } 527 528 /* 529 * returns the input and output packet and byte counts on the vif provided 530 */ 531 static int 532 get_vif_cnt(struct sioc_vif_req *req) 533 { 534 vifi_t vifi = req->vifi; 535 536 if (vifi >= numvifs) 537 return EINVAL; 538 539 req->icount = viftable[vifi].v_pkt_in; 540 req->ocount = viftable[vifi].v_pkt_out; 541 req->ibytes = viftable[vifi].v_bytes_in; 542 req->obytes = viftable[vifi].v_bytes_out; 543 544 return 0; 545 } 546 547 /* 548 * Enable multicast routing 549 */ 550 static int 551 ip_mrouter_init(struct socket *so, int version) 552 { 553 if (mrtdebug) 554 log(LOG_DEBUG, "ip_mrouter_init: so_type = %d, pr_protocol = %d\n", 555 so->so_type, so->so_proto->pr_protocol); 556 557 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) 558 return EOPNOTSUPP; 559 560 if (version != 1) 561 return ENOPROTOOPT; 562 563 if (ip_mrouter != NULL) 564 return EADDRINUSE; 565 566 ip_mrouter = so; 567 568 bzero((caddr_t)mfctable, sizeof(mfctable)); 569 bzero((caddr_t)nexpire, sizeof(nexpire)); 570 571 pim_assert = 0; 572 573 expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT); 574 575 bw_upcalls_n = 0; 576 bzero((caddr_t)bw_meter_timers, sizeof(bw_meter_timers)); 577 bw_upcalls_ch = timeout(expire_bw_upcalls_send, NULL, BW_UPCALLS_PERIOD); 578 bw_meter_ch = timeout(expire_bw_meter_process, NULL, BW_METER_PERIOD); 579 580 mrt_api_config = 0; 581 582 if (mrtdebug) 583 log(LOG_DEBUG, "ip_mrouter_init\n"); 584 585 return 0; 586 } 587 588 /* 589 * Disable multicast routing 590 */ 591 static int 592 X_ip_mrouter_done(void) 593 { 594 vifi_t vifi; 595 int i; 596 struct ifnet *ifp; 597 struct ifreq ifr; 598 struct mfc *rt; 599 struct rtdetq *rte; 600 int s; 601 602 s = splnet(); 603 604 /* 605 * For each phyint in use, disable promiscuous reception of all IP 606 * multicasts. 607 */ 608 for (vifi = 0; vifi < numvifs; vifi++) { 609 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 610 !(viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 611 struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr); 612 613 so->sin_len = sizeof(struct sockaddr_in); 614 so->sin_family = AF_INET; 615 so->sin_addr.s_addr = INADDR_ANY; 616 ifp = viftable[vifi].v_ifp; 617 if_allmulti(ifp, 0); 618 } 619 } 620 bzero((caddr_t)tbftable, sizeof(tbftable)); 621 bzero((caddr_t)viftable, sizeof(viftable)); 622 numvifs = 0; 623 pim_assert = 0; 624 625 untimeout(expire_upcalls, NULL, expire_upcalls_ch); 626 627 mrt_api_config = 0; 628 bw_upcalls_n = 0; 629 untimeout(expire_bw_upcalls_send, NULL, bw_upcalls_ch); 630 untimeout(expire_bw_meter_process, NULL, bw_meter_ch); 631 632 /* 633 * Free all multicast forwarding cache entries. 634 */ 635 for (i = 0; i < MFCTBLSIZ; i++) { 636 for (rt = mfctable[i]; rt != NULL; ) { 637 struct mfc *nr = rt->mfc_next; 638 639 for (rte = rt->mfc_stall; rte != NULL; ) { 640 struct rtdetq *n = rte->next; 641 642 m_freem(rte->m); 643 free(rte, M_MRTABLE); 644 rte = n; 645 } 646 free_bw_list(rt->mfc_bw_meter); 647 free(rt, M_MRTABLE); 648 rt = nr; 649 } 650 } 651 652 bzero((caddr_t)mfctable, sizeof(mfctable)); 653 654 bzero(bw_meter_timers, sizeof(bw_meter_timers)); 655 656 /* 657 * Reset de-encapsulation cache 658 */ 659 last_encap_src = INADDR_ANY; 660 last_encap_vif = NULL; 661 #ifdef PIM 662 reg_vif_num = VIFI_INVALID; 663 #endif 664 have_encap_tunnel = 0; 665 666 ip_mrouter = NULL; 667 668 splx(s); 669 670 if (mrtdebug) 671 log(LOG_DEBUG, "ip_mrouter_done\n"); 672 673 return 0; 674 } 675 676 /* 677 * Set PIM assert processing global 678 */ 679 static int 680 set_assert(int i) 681 { 682 if ((i != 1) && (i != 0)) 683 return EINVAL; 684 685 pim_assert = i; 686 687 return 0; 688 } 689 690 /* 691 * Configure API capabilities 692 */ 693 int 694 set_api_config(uint32_t *apival) 695 { 696 int i; 697 698 /* 699 * We can set the API capabilities only if it is the first operation 700 * after MRT_INIT. I.e.: 701 * - there are no vifs installed 702 * - pim_assert is not enabled 703 * - the MFC table is empty 704 */ 705 if (numvifs > 0) { 706 *apival = 0; 707 return EPERM; 708 } 709 if (pim_assert) { 710 *apival = 0; 711 return EPERM; 712 } 713 for (i = 0; i < MFCTBLSIZ; i++) { 714 if (mfctable[i] != NULL) { 715 *apival = 0; 716 return EPERM; 717 } 718 } 719 720 mrt_api_config = *apival & mrt_api_support; 721 *apival = mrt_api_config; 722 723 return 0; 724 } 725 726 /* 727 * Add a vif to the vif table 728 */ 729 static int 730 add_vif(struct vifctl *vifcp) 731 { 732 struct vif *vifp = viftable + vifcp->vifc_vifi; 733 struct sockaddr_in sin = {sizeof sin, AF_INET}; 734 struct ifaddr *ifa; 735 struct ifnet *ifp; 736 int error, s; 737 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 738 739 if (vifcp->vifc_vifi >= MAXVIFS) 740 return EINVAL; 741 if (vifp->v_lcl_addr.s_addr != INADDR_ANY) 742 return EADDRINUSE; 743 if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) 744 return EADDRNOTAVAIL; 745 746 /* Find the interface with an address in AF_INET family */ 747 #ifdef PIM 748 if (vifcp->vifc_flags & VIFF_REGISTER) { 749 /* 750 * XXX: Because VIFF_REGISTER does not really need a valid 751 * local interface (e.g. it could be 127.0.0.2), we don't 752 * check its address. 753 */ 754 ifp = NULL; 755 } else 756 #endif 757 { 758 sin.sin_addr = vifcp->vifc_lcl_addr; 759 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 760 if (ifa == NULL) 761 return EADDRNOTAVAIL; 762 ifp = ifa->ifa_ifp; 763 } 764 765 if (vifcp->vifc_flags & VIFF_TUNNEL) { 766 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 767 /* 768 * An encapsulating tunnel is wanted. Tell ipip_input() to 769 * start paying attention to encapsulated packets. 770 */ 771 if (have_encap_tunnel == 0) { 772 have_encap_tunnel = 1; 773 for (s = 0; s < MAXVIFS; ++s) { 774 multicast_decap_if[s].if_name = "mdecap"; 775 multicast_decap_if[s].if_unit = s; 776 } 777 } 778 /* 779 * Set interface to fake encapsulator interface 780 */ 781 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 782 /* 783 * Prepare cached route entry 784 */ 785 bzero(&vifp->v_route, sizeof(vifp->v_route)); 786 } else { 787 log(LOG_ERR, "source routed tunnels not supported\n"); 788 return EOPNOTSUPP; 789 } 790 #ifdef PIM 791 } else if (vifcp->vifc_flags & VIFF_REGISTER) { 792 ifp = &multicast_register_if; 793 if (mrtdebug) 794 log(LOG_DEBUG, "Adding a register vif, ifp: %p\n", 795 (void *)&multicast_register_if); 796 if (reg_vif_num == VIFI_INVALID) { 797 multicast_register_if.if_name = "register_vif"; 798 multicast_register_if.if_unit = 0; 799 multicast_register_if.if_flags = IFF_LOOPBACK; 800 bzero(&vifp->v_route, sizeof(vifp->v_route)); 801 reg_vif_num = vifcp->vifc_vifi; 802 } 803 #endif 804 } else { /* Make sure the interface supports multicast */ 805 if ((ifp->if_flags & IFF_MULTICAST) == 0) 806 return EOPNOTSUPP; 807 808 /* Enable promiscuous reception of all IP multicasts from the if */ 809 s = splnet(); 810 error = if_allmulti(ifp, 1); 811 splx(s); 812 if (error) 813 return error; 814 } 815 816 s = splnet(); 817 /* define parameters for the tbf structure */ 818 vifp->v_tbf = v_tbf; 819 GET_TIME(vifp->v_tbf->tbf_last_pkt_t); 820 vifp->v_tbf->tbf_n_tok = 0; 821 vifp->v_tbf->tbf_q_len = 0; 822 vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 823 vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 824 825 vifp->v_flags = vifcp->vifc_flags; 826 vifp->v_threshold = vifcp->vifc_threshold; 827 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 828 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 829 vifp->v_ifp = ifp; 830 /* scaling up here allows division by 1024 in critical code */ 831 vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; 832 vifp->v_rsvp_on = 0; 833 vifp->v_rsvpd = NULL; 834 /* initialize per vif pkt counters */ 835 vifp->v_pkt_in = 0; 836 vifp->v_pkt_out = 0; 837 vifp->v_bytes_in = 0; 838 vifp->v_bytes_out = 0; 839 splx(s); 840 841 /* Adjust numvifs up if the vifi is higher than numvifs */ 842 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 843 844 if (mrtdebug) 845 log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", 846 vifcp->vifc_vifi, 847 (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr), 848 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 849 (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr), 850 vifcp->vifc_threshold, 851 vifcp->vifc_rate_limit); 852 853 return 0; 854 } 855 856 /* 857 * Delete a vif from the vif table 858 */ 859 static int 860 del_vif(vifi_t vifi) 861 { 862 struct vif *vifp; 863 int s; 864 865 if (vifi >= numvifs) 866 return EINVAL; 867 vifp = &viftable[vifi]; 868 if (vifp->v_lcl_addr.s_addr == INADDR_ANY) 869 return EADDRNOTAVAIL; 870 871 s = splnet(); 872 873 if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) 874 if_allmulti(vifp->v_ifp, 0); 875 876 if (vifp == last_encap_vif) { 877 last_encap_vif = NULL; 878 last_encap_src = INADDR_ANY; 879 } 880 881 /* 882 * Free packets queued at the interface 883 */ 884 while (vifp->v_tbf->tbf_q) { 885 struct mbuf *m = vifp->v_tbf->tbf_q; 886 887 vifp->v_tbf->tbf_q = m->m_act; 888 m_freem(m); 889 } 890 891 #ifdef PIM 892 if (vifp->v_flags & VIFF_REGISTER) 893 reg_vif_num = VIFI_INVALID; 894 #endif 895 896 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 897 bzero((caddr_t)vifp, sizeof (*vifp)); 898 899 if (mrtdebug) 900 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs); 901 902 /* Adjust numvifs down */ 903 for (vifi = numvifs; vifi > 0; vifi--) 904 if (viftable[vifi-1].v_lcl_addr.s_addr != INADDR_ANY) 905 break; 906 numvifs = vifi; 907 908 splx(s); 909 910 return 0; 911 } 912 913 /* 914 * update an mfc entry without resetting counters and S,G addresses. 915 */ 916 static void 917 update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 918 { 919 int i; 920 921 rt->mfc_parent = mfccp->mfcc_parent; 922 for (i = 0; i < numvifs; i++) { 923 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 924 rt->mfc_flags[i] = mfccp->mfcc_flags[i] & mrt_api_config & 925 MRT_MFC_FLAGS_ALL; 926 } 927 /* set the RP address */ 928 if (mrt_api_config & MRT_MFC_RP) 929 rt->mfc_rp = mfccp->mfcc_rp; 930 else 931 rt->mfc_rp.s_addr = INADDR_ANY; 932 } 933 934 /* 935 * fully initialize an mfc entry from the parameter. 936 */ 937 static void 938 init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 939 { 940 rt->mfc_origin = mfccp->mfcc_origin; 941 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 942 943 update_mfc_params(rt, mfccp); 944 945 /* initialize pkt counters per src-grp */ 946 rt->mfc_pkt_cnt = 0; 947 rt->mfc_byte_cnt = 0; 948 rt->mfc_wrong_if = 0; 949 rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 950 } 951 952 953 /* 954 * Add an mfc entry 955 */ 956 static int 957 add_mfc(struct mfcctl2 *mfccp) 958 { 959 struct mfc *rt; 960 u_long hash; 961 struct rtdetq *rte; 962 u_short nstl; 963 int s; 964 965 rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 966 967 /* If an entry already exists, just update the fields */ 968 if (rt) { 969 if (mrtdebug & DEBUG_MFC) 970 log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", 971 (u_long)ntohl(mfccp->mfcc_origin.s_addr), 972 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 973 mfccp->mfcc_parent); 974 975 s = splnet(); 976 update_mfc_params(rt, mfccp); 977 splx(s); 978 return 0; 979 } 980 981 /* 982 * Find the entry for which the upcall was made and update 983 */ 984 s = splnet(); 985 hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 986 for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { 987 988 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 989 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 990 (rt->mfc_stall != NULL)) { 991 992 if (nstl++) 993 log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", 994 "multiple kernel entries", 995 (u_long)ntohl(mfccp->mfcc_origin.s_addr), 996 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 997 mfccp->mfcc_parent, (void *)rt->mfc_stall); 998 999 if (mrtdebug & DEBUG_MFC) 1000 log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", 1001 (u_long)ntohl(mfccp->mfcc_origin.s_addr), 1002 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1003 mfccp->mfcc_parent, (void *)rt->mfc_stall); 1004 1005 init_mfc_params(rt, mfccp); 1006 1007 rt->mfc_expire = 0; /* Don't clean this guy up */ 1008 nexpire[hash]--; 1009 1010 /* free packets Qed at the end of this entry */ 1011 for (rte = rt->mfc_stall; rte != NULL; ) { 1012 struct rtdetq *n = rte->next; 1013 1014 ip_mdq(rte->m, rte->ifp, rt, -1); 1015 m_freem(rte->m); 1016 free(rte, M_MRTABLE); 1017 rte = n; 1018 } 1019 rt->mfc_stall = NULL; 1020 } 1021 } 1022 1023 /* 1024 * It is possible that an entry is being inserted without an upcall 1025 */ 1026 if (nstl == 0) { 1027 if (mrtdebug & DEBUG_MFC) 1028 log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", 1029 hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr), 1030 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1031 mfccp->mfcc_parent); 1032 1033 for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { 1034 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 1035 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 1036 init_mfc_params(rt, mfccp); 1037 if (rt->mfc_expire) 1038 nexpire[hash]--; 1039 rt->mfc_expire = 0; 1040 break; /* XXX */ 1041 } 1042 } 1043 if (rt == NULL) { /* no upcall, so make a new entry */ 1044 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1045 if (rt == NULL) { 1046 splx(s); 1047 return ENOBUFS; 1048 } 1049 1050 init_mfc_params(rt, mfccp); 1051 rt->mfc_expire = 0; 1052 rt->mfc_stall = NULL; 1053 1054 rt->mfc_bw_meter = NULL; 1055 /* insert new entry at head of hash chain */ 1056 rt->mfc_next = mfctable[hash]; 1057 mfctable[hash] = rt; 1058 } 1059 } 1060 splx(s); 1061 return 0; 1062 } 1063 1064 /* 1065 * Delete an mfc entry 1066 */ 1067 static int 1068 del_mfc(struct mfcctl2 *mfccp) 1069 { 1070 struct in_addr origin; 1071 struct in_addr mcastgrp; 1072 struct mfc *rt; 1073 struct mfc **nptr; 1074 u_long hash; 1075 int s; 1076 struct bw_meter *list; 1077 1078 origin = mfccp->mfcc_origin; 1079 mcastgrp = mfccp->mfcc_mcastgrp; 1080 1081 if (mrtdebug & DEBUG_MFC) 1082 log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", 1083 (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); 1084 1085 s = splnet(); 1086 1087 hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 1088 for (nptr = &mfctable[hash]; (rt = *nptr) != NULL; nptr = &rt->mfc_next) 1089 if (origin.s_addr == rt->mfc_origin.s_addr && 1090 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 1091 rt->mfc_stall == NULL) 1092 break; 1093 if (rt == NULL) { 1094 splx(s); 1095 return EADDRNOTAVAIL; 1096 } 1097 1098 *nptr = rt->mfc_next; 1099 1100 /* 1101 * free the bw_meter entries 1102 */ 1103 list = rt->mfc_bw_meter; 1104 rt->mfc_bw_meter = NULL; 1105 1106 free(rt, M_MRTABLE); 1107 1108 splx(s); 1109 1110 free_bw_list(list); 1111 1112 return 0; 1113 } 1114 1115 /* 1116 * Send a message to mrouted on the multicast routing socket 1117 */ 1118 static int 1119 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) 1120 { 1121 if (s) { 1122 if (sbappendaddr(&s->so_rcv, (struct sockaddr *)src, mm, NULL) != 0) { 1123 sorwakeup(s); 1124 return 0; 1125 } 1126 } 1127 m_freem(mm); 1128 return -1; 1129 } 1130 1131 /* 1132 * IP multicast forwarding function. This function assumes that the packet 1133 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 1134 * pointed to by "ifp", and the packet is to be relayed to other networks 1135 * that have members of the packet's destination IP multicast group. 1136 * 1137 * The packet is returned unscathed to the caller, unless it is 1138 * erroneous, in which case a non-zero return value tells the caller to 1139 * discard it. 1140 */ 1141 1142 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1143 1144 static int 1145 X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, 1146 struct ip_moptions *imo) 1147 { 1148 struct mfc *rt; 1149 int s; 1150 vifi_t vifi; 1151 1152 if (mrtdebug & DEBUG_FORWARD) 1153 log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", 1154 (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), 1155 (void *)ifp); 1156 1157 if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || 1158 ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 1159 /* 1160 * Packet arrived via a physical interface or 1161 * an encapsulated tunnel or a register_vif. 1162 */ 1163 } else { 1164 /* 1165 * Packet arrived through a source-route tunnel. 1166 * Source-route tunnels are no longer supported. 1167 */ 1168 static int last_log; 1169 if (last_log != time_second) { 1170 last_log = time_second; 1171 log(LOG_ERR, 1172 "ip_mforward: received source-routed packet from %lx\n", 1173 (u_long)ntohl(ip->ip_src.s_addr)); 1174 } 1175 return 1; 1176 } 1177 1178 if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) { 1179 if (ip->ip_ttl < 255) 1180 ip->ip_ttl++; /* compensate for -1 in *_send routines */ 1181 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1182 struct vif *vifp = viftable + vifi; 1183 1184 printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s%d)\n", 1185 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr), 1186 vifi, 1187 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", 1188 vifp->v_ifp->if_name, vifp->v_ifp->if_unit); 1189 } 1190 return ip_mdq(m, ifp, NULL, vifi); 1191 } 1192 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1193 printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n", 1194 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr)); 1195 if (!imo) 1196 printf("In fact, no options were specified at all\n"); 1197 } 1198 1199 /* 1200 * Don't forward a packet with time-to-live of zero or one, 1201 * or a packet destined to a local-only group. 1202 */ 1203 if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1204 return 0; 1205 1206 /* 1207 * Determine forwarding vifs from the forwarding cache table 1208 */ 1209 s = splnet(); 1210 ++mrtstat.mrts_mfc_lookups; 1211 rt = mfc_find(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1212 1213 /* Entry exists, so forward if necessary */ 1214 if (rt != NULL) { 1215 splx(s); 1216 return ip_mdq(m, ifp, rt, -1); 1217 } else { 1218 /* 1219 * If we don't have a route for packet's origin, 1220 * Make a copy of the packet & send message to routing daemon 1221 */ 1222 1223 struct mbuf *mb0; 1224 struct rtdetq *rte; 1225 u_long hash; 1226 int hlen = ip->ip_hl << 2; 1227 1228 ++mrtstat.mrts_mfc_misses; 1229 1230 mrtstat.mrts_no_route++; 1231 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) 1232 log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", 1233 (u_long)ntohl(ip->ip_src.s_addr), 1234 (u_long)ntohl(ip->ip_dst.s_addr)); 1235 1236 /* 1237 * Allocate mbufs early so that we don't do extra work if we are 1238 * just going to fail anyway. Make sure to pullup the header so 1239 * that other people can't step on it. 1240 */ 1241 rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT); 1242 if (rte == NULL) { 1243 splx(s); 1244 return ENOBUFS; 1245 } 1246 mb0 = m_copypacket(m, M_DONTWAIT); 1247 if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) 1248 mb0 = m_pullup(mb0, hlen); 1249 if (mb0 == NULL) { 1250 free(rte, M_MRTABLE); 1251 splx(s); 1252 return ENOBUFS; 1253 } 1254 1255 /* is there an upcall waiting for this flow ? */ 1256 hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1257 for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { 1258 if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && 1259 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1260 (rt->mfc_stall != NULL)) 1261 break; 1262 } 1263 1264 if (rt == NULL) { 1265 int i; 1266 struct igmpmsg *im; 1267 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1268 struct mbuf *mm; 1269 1270 /* 1271 * Locate the vifi for the incoming interface for this packet. 1272 * If none found, drop packet. 1273 */ 1274 for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++) 1275 ; 1276 if (vifi >= numvifs) /* vif not found, drop packet */ 1277 goto non_fatal; 1278 1279 /* no upcall, so make a new entry */ 1280 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1281 if (rt == NULL) 1282 goto fail; 1283 /* Make a copy of the header to send to the user level process */ 1284 mm = m_copy(mb0, 0, hlen); 1285 if (mm == NULL) 1286 goto fail1; 1287 1288 /* 1289 * Send message to routing daemon to install 1290 * a route into the kernel table 1291 */ 1292 1293 im = mtod(mm, struct igmpmsg *); 1294 im->im_msgtype = IGMPMSG_NOCACHE; 1295 im->im_mbz = 0; 1296 im->im_vif = vifi; 1297 1298 mrtstat.mrts_upcalls++; 1299 1300 k_igmpsrc.sin_addr = ip->ip_src; 1301 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1302 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); 1303 ++mrtstat.mrts_upq_sockfull; 1304 fail1: 1305 free(rt, M_MRTABLE); 1306 fail: 1307 free(rte, M_MRTABLE); 1308 m_freem(mb0); 1309 splx(s); 1310 return ENOBUFS; 1311 } 1312 1313 /* insert new entry at head of hash chain */ 1314 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1315 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1316 rt->mfc_expire = UPCALL_EXPIRE; 1317 nexpire[hash]++; 1318 for (i = 0; i < numvifs; i++) { 1319 rt->mfc_ttls[i] = 0; 1320 rt->mfc_flags[i] = 0; 1321 } 1322 rt->mfc_parent = -1; 1323 1324 rt->mfc_rp.s_addr = INADDR_ANY; /* clear the RP address */ 1325 1326 rt->mfc_bw_meter = NULL; 1327 1328 /* link into table */ 1329 rt->mfc_next = mfctable[hash]; 1330 mfctable[hash] = rt; 1331 rt->mfc_stall = rte; 1332 1333 } else { 1334 /* determine if q has overflowed */ 1335 int npkts = 0; 1336 struct rtdetq **p; 1337 1338 /* 1339 * XXX ouch! we need to append to the list, but we 1340 * only have a pointer to the front, so we have to 1341 * scan the entire list every time. 1342 */ 1343 for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) 1344 npkts++; 1345 1346 if (npkts > MAX_UPQ) { 1347 mrtstat.mrts_upq_ovflw++; 1348 non_fatal: 1349 free(rte, M_MRTABLE); 1350 m_freem(mb0); 1351 splx(s); 1352 return 0; 1353 } 1354 1355 /* Add this entry to the end of the queue */ 1356 *p = rte; 1357 } 1358 1359 rte->m = mb0; 1360 rte->ifp = ifp; 1361 rte->next = NULL; 1362 1363 splx(s); 1364 1365 return 0; 1366 } 1367 } 1368 1369 /* 1370 * Clean up the cache entry if upcall is not serviced 1371 */ 1372 static void 1373 expire_upcalls(void *unused) 1374 { 1375 struct rtdetq *rte; 1376 struct mfc *mfc, **nptr; 1377 int i; 1378 int s; 1379 1380 s = splnet(); 1381 for (i = 0; i < MFCTBLSIZ; i++) { 1382 if (nexpire[i] == 0) 1383 continue; 1384 nptr = &mfctable[i]; 1385 for (mfc = *nptr; mfc != NULL; mfc = *nptr) { 1386 /* 1387 * Skip real cache entries 1388 * Make sure it wasn't marked to not expire (shouldn't happen) 1389 * If it expires now 1390 */ 1391 if (mfc->mfc_stall != NULL && mfc->mfc_expire != 0 && 1392 --mfc->mfc_expire == 0) { 1393 if (mrtdebug & DEBUG_EXPIRE) 1394 log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", 1395 (u_long)ntohl(mfc->mfc_origin.s_addr), 1396 (u_long)ntohl(mfc->mfc_mcastgrp.s_addr)); 1397 /* 1398 * drop all the packets 1399 * free the mbuf with the pkt, if, timing info 1400 */ 1401 for (rte = mfc->mfc_stall; rte; ) { 1402 struct rtdetq *n = rte->next; 1403 1404 m_freem(rte->m); 1405 free(rte, M_MRTABLE); 1406 rte = n; 1407 } 1408 ++mrtstat.mrts_cache_cleanups; 1409 nexpire[i]--; 1410 1411 /* 1412 * free the bw_meter entries 1413 */ 1414 while (mfc->mfc_bw_meter != NULL) { 1415 struct bw_meter *x = mfc->mfc_bw_meter; 1416 1417 mfc->mfc_bw_meter = x->bm_mfc_next; 1418 free(x, M_BWMETER); 1419 } 1420 1421 *nptr = mfc->mfc_next; 1422 free(mfc, M_MRTABLE); 1423 } else { 1424 nptr = &mfc->mfc_next; 1425 } 1426 } 1427 } 1428 splx(s); 1429 expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT); 1430 } 1431 1432 /* 1433 * Packet forwarding routine once entry in the cache is made 1434 */ 1435 static int 1436 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) 1437 { 1438 struct ip *ip = mtod(m, struct ip *); 1439 vifi_t vifi; 1440 int plen = ip->ip_len; 1441 1442 /* 1443 * Macro to send packet on vif. Since RSVP packets don't get counted on 1444 * input, they shouldn't get counted on output, so statistics keeping is 1445 * separate. 1446 */ 1447 #define MC_SEND(ip,vifp,m) { \ 1448 if ((vifp)->v_flags & VIFF_TUNNEL) \ 1449 encap_send((ip), (vifp), (m)); \ 1450 else \ 1451 phyint_send((ip), (vifp), (m)); \ 1452 } 1453 1454 /* 1455 * If xmt_vif is not -1, send on only the requested vif. 1456 * 1457 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) 1458 */ 1459 if (xmt_vif < numvifs) { 1460 #ifdef PIM 1461 if (viftable[xmt_vif].v_flags & VIFF_REGISTER) 1462 pim_register_send(ip, viftable + xmt_vif, m, rt); 1463 else 1464 #endif 1465 MC_SEND(ip, viftable + xmt_vif, m); 1466 return 1; 1467 } 1468 1469 /* 1470 * Don't forward if it didn't arrive from the parent vif for its origin. 1471 */ 1472 vifi = rt->mfc_parent; 1473 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1474 /* came in the wrong interface */ 1475 if (mrtdebug & DEBUG_FORWARD) 1476 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", 1477 (void *)ifp, vifi, (void *)viftable[vifi].v_ifp); 1478 ++mrtstat.mrts_wrong_if; 1479 ++rt->mfc_wrong_if; 1480 /* 1481 * If we are doing PIM assert processing, send a message 1482 * to the routing daemon. 1483 * 1484 * XXX: A PIM-SM router needs the WRONGVIF detection so it 1485 * can complete the SPT switch, regardless of the type 1486 * of the iif (broadcast media, GRE tunnel, etc). 1487 */ 1488 if (pim_assert && (vifi < numvifs) && viftable[vifi].v_ifp) { 1489 struct timeval now; 1490 u_long delta; 1491 1492 #ifdef PIM 1493 if (ifp == &multicast_register_if) 1494 pimstat.pims_rcv_registers_wrongiif++; 1495 #endif 1496 1497 /* Get vifi for the incoming packet */ 1498 for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++) 1499 ; 1500 if (vifi >= numvifs) 1501 return 0; /* The iif is not found: ignore the packet. */ 1502 1503 if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF) 1504 return 0; /* WRONGVIF disabled: ignore the packet */ 1505 1506 GET_TIME(now); 1507 1508 TV_DELTA(rt->mfc_last_assert, now, delta); 1509 1510 if (delta > ASSERT_MSG_TIME) { 1511 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1512 struct igmpmsg *im; 1513 int hlen = ip->ip_hl << 2; 1514 struct mbuf *mm = m_copy(m, 0, hlen); 1515 1516 if (mm && (M_HASCL(mm) || mm->m_len < hlen)) 1517 mm = m_pullup(mm, hlen); 1518 if (mm == NULL) 1519 return ENOBUFS; 1520 1521 rt->mfc_last_assert = now; 1522 1523 im = mtod(mm, struct igmpmsg *); 1524 im->im_msgtype = IGMPMSG_WRONGVIF; 1525 im->im_mbz = 0; 1526 im->im_vif = vifi; 1527 1528 mrtstat.mrts_upcalls++; 1529 1530 k_igmpsrc.sin_addr = im->im_src; 1531 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1532 log(LOG_WARNING, 1533 "ip_mforward: ip_mrouter socket queue full\n"); 1534 ++mrtstat.mrts_upq_sockfull; 1535 return ENOBUFS; 1536 } 1537 } 1538 } 1539 return 0; 1540 } 1541 1542 /* If I sourced this packet, it counts as output, else it was input. */ 1543 if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { 1544 viftable[vifi].v_pkt_out++; 1545 viftable[vifi].v_bytes_out += plen; 1546 } else { 1547 viftable[vifi].v_pkt_in++; 1548 viftable[vifi].v_bytes_in += plen; 1549 } 1550 rt->mfc_pkt_cnt++; 1551 rt->mfc_byte_cnt += plen; 1552 1553 /* 1554 * For each vif, decide if a copy of the packet should be forwarded. 1555 * Forward if: 1556 * - the ttl exceeds the vif's threshold 1557 * - there are group members downstream on interface 1558 */ 1559 for (vifi = 0; vifi < numvifs; vifi++) 1560 if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1561 viftable[vifi].v_pkt_out++; 1562 viftable[vifi].v_bytes_out += plen; 1563 #ifdef PIM 1564 if (viftable[vifi].v_flags & VIFF_REGISTER) 1565 pim_register_send(ip, viftable + vifi, m, rt); 1566 else 1567 #endif 1568 MC_SEND(ip, viftable+vifi, m); 1569 } 1570 1571 /* 1572 * Perform upcall-related bw measuring. 1573 */ 1574 if (rt->mfc_bw_meter != NULL) { 1575 struct bw_meter *x; 1576 struct timeval now; 1577 1578 GET_TIME(now); 1579 for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) 1580 bw_meter_receive_packet(x, plen, &now); 1581 } 1582 1583 return 0; 1584 } 1585 1586 /* 1587 * check if a vif number is legal/ok. This is used by ip_output. 1588 */ 1589 static int 1590 X_legal_vif_num(int vif) 1591 { 1592 return (vif >= 0 && vif < numvifs); 1593 } 1594 1595 /* 1596 * Return the local address used by this vif 1597 */ 1598 static u_long 1599 X_ip_mcast_src(int vifi) 1600 { 1601 if (vifi >= 0 && vifi < numvifs) 1602 return viftable[vifi].v_lcl_addr.s_addr; 1603 else 1604 return INADDR_ANY; 1605 } 1606 1607 static void 1608 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1609 { 1610 struct mbuf *mb_copy; 1611 int hlen = ip->ip_hl << 2; 1612 1613 /* 1614 * Make a new reference to the packet; make sure that 1615 * the IP header is actually copied, not just referenced, 1616 * so that ip_output() only scribbles on the copy. 1617 */ 1618 mb_copy = m_copypacket(m, M_DONTWAIT); 1619 if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) 1620 mb_copy = m_pullup(mb_copy, hlen); 1621 if (mb_copy == NULL) 1622 return; 1623 1624 if (vifp->v_rate_limit == 0) 1625 tbf_send_packet(vifp, mb_copy); 1626 else 1627 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); 1628 } 1629 1630 static void 1631 encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1632 { 1633 struct mbuf *mb_copy; 1634 struct ip *ip_copy; 1635 int i, len = ip->ip_len; 1636 1637 /* Take care of delayed checksums */ 1638 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1639 in_delayed_cksum(m); 1640 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1641 } 1642 1643 /* 1644 * copy the old packet & pullup its IP header into the 1645 * new mbuf so we can modify it. Try to fill the new 1646 * mbuf since if we don't the ethernet driver will. 1647 */ 1648 MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); 1649 if (mb_copy == NULL) 1650 return; 1651 mb_copy->m_data += max_linkhdr; 1652 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1653 1654 if ((mb_copy->m_next = m_copypacket(m, M_DONTWAIT)) == NULL) { 1655 m_freem(mb_copy); 1656 return; 1657 } 1658 i = MHLEN - M_LEADINGSPACE(mb_copy); 1659 if (i > len) 1660 i = len; 1661 mb_copy = m_pullup(mb_copy, i); 1662 if (mb_copy == NULL) 1663 return; 1664 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1665 1666 /* 1667 * fill in the encapsulating IP header. 1668 */ 1669 ip_copy = mtod(mb_copy, struct ip *); 1670 *ip_copy = multicast_encap_iphdr; 1671 #ifdef RANDOM_IP_ID 1672 ip_copy->ip_id = ip_randomid(); 1673 #else 1674 ip_copy->ip_id = htons(ip_id++); 1675 #endif 1676 ip_copy->ip_len += len; 1677 ip_copy->ip_src = vifp->v_lcl_addr; 1678 ip_copy->ip_dst = vifp->v_rmt_addr; 1679 1680 /* 1681 * turn the encapsulated IP header back into a valid one. 1682 */ 1683 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1684 --ip->ip_ttl; 1685 ip->ip_len = htons(ip->ip_len); 1686 ip->ip_off = htons(ip->ip_off); 1687 ip->ip_sum = 0; 1688 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1689 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1690 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1691 1692 if (vifp->v_rate_limit == 0) 1693 tbf_send_packet(vifp, mb_copy); 1694 else 1695 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); 1696 } 1697 1698 /* 1699 * De-encapsulate a packet and feed it back through ip input (this 1700 * routine is called whenever IP gets a packet with proto type 1701 * ENCAP_PROTO and a local destination address). 1702 * 1703 * This is similar to mroute_encapcheck() + mroute_encap_input() in -current. 1704 */ 1705 static void 1706 X_ipip_input(struct mbuf *m, int off, int proto) 1707 { 1708 struct ip *ip = mtod(m, struct ip *); 1709 int hlen = ip->ip_hl << 2; 1710 int s; 1711 struct ifqueue *ifq; 1712 1713 if (!have_encap_tunnel) { 1714 rip_input(m, off, proto); 1715 return; 1716 } 1717 /* 1718 * dump the packet if it's not to a multicast destination or if 1719 * we don't have an encapsulating tunnel with the source. 1720 * Note: This code assumes that the remote site IP address 1721 * uniquely identifies the tunnel (i.e., that this site has 1722 * at most one tunnel with the remote site). 1723 */ 1724 if (!IN_MULTICAST(ntohl(((struct ip *)((char *)ip+hlen))->ip_dst.s_addr))) { 1725 ++mrtstat.mrts_bad_tunnel; 1726 m_freem(m); 1727 return; 1728 } 1729 if (ip->ip_src.s_addr != last_encap_src) { 1730 struct vif *vifp = viftable; 1731 struct vif *vife = vifp + numvifs; 1732 1733 last_encap_src = ip->ip_src.s_addr; 1734 last_encap_vif = NULL; 1735 for ( ; vifp < vife; ++vifp) 1736 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1737 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1738 == VIFF_TUNNEL) 1739 last_encap_vif = vifp; 1740 break; 1741 } 1742 } 1743 if (last_encap_vif == NULL) { 1744 last_encap_src = INADDR_ANY; 1745 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1746 m_freem(m); 1747 if (mrtdebug) 1748 log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n", 1749 (u_long)ntohl(ip->ip_src.s_addr)); 1750 return; 1751 } 1752 1753 if (hlen > sizeof(struct ip)) 1754 ip_stripoptions(m, NULL); 1755 m->m_data += sizeof(struct ip); 1756 m->m_len -= sizeof(struct ip); 1757 m->m_pkthdr.len -= sizeof(struct ip); 1758 m->m_pkthdr.rcvif = last_encap_vif->v_ifp; 1759 1760 ifq = &ipintrq; 1761 s = splimp(); 1762 if (IF_QFULL(ifq)) { 1763 IF_DROP(ifq); 1764 m_freem(m); 1765 } else { 1766 IF_ENQUEUE(ifq, m); 1767 /* 1768 * normally we would need a "schednetisr(NETISR_IP)" 1769 * here but we were called by ip_input and it is going 1770 * to loop back & try to dequeue the packet we just 1771 * queued as soon as we return so we avoid the 1772 * unnecessary software interrrupt. 1773 */ 1774 } 1775 splx(s); 1776 } 1777 1778 /* 1779 * Token bucket filter module 1780 */ 1781 1782 static void 1783 tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_long p_len) 1784 { 1785 struct tbf *t = vifp->v_tbf; 1786 1787 if (p_len > MAX_BKT_SIZE) { /* drop if packet is too large */ 1788 mrtstat.mrts_pkt2large++; 1789 m_freem(m); 1790 return; 1791 } 1792 1793 tbf_update_tokens(vifp); 1794 1795 if (t->tbf_q_len == 0) { /* queue empty... */ 1796 if (p_len <= t->tbf_n_tok) { /* send packet if enough tokens */ 1797 t->tbf_n_tok -= p_len; 1798 tbf_send_packet(vifp, m); 1799 } else { /* no, queue packet and try later */ 1800 tbf_queue(vifp, m); 1801 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); 1802 } 1803 } else if (t->tbf_q_len < t->tbf_max_q_len) { 1804 /* finite queue length, so queue pkts and process queue */ 1805 tbf_queue(vifp, m); 1806 tbf_process_q(vifp); 1807 } else { 1808 /* queue full, try to dq and queue and process */ 1809 if (!tbf_dq_sel(vifp, ip)) { 1810 mrtstat.mrts_q_overflow++; 1811 m_freem(m); 1812 } else { 1813 tbf_queue(vifp, m); 1814 tbf_process_q(vifp); 1815 } 1816 } 1817 } 1818 1819 /* 1820 * adds a packet to the queue at the interface 1821 */ 1822 static void 1823 tbf_queue(struct vif *vifp, struct mbuf *m) 1824 { 1825 int s = splnet(); 1826 struct tbf *t = vifp->v_tbf; 1827 1828 if (t->tbf_t == NULL) /* Queue was empty */ 1829 t->tbf_q = m; 1830 else /* Insert at tail */ 1831 t->tbf_t->m_act = m; 1832 1833 t->tbf_t = m; /* Set new tail pointer */ 1834 1835 #ifdef DIAGNOSTIC 1836 /* Make sure we didn't get fed a bogus mbuf */ 1837 if (m->m_act) 1838 panic("tbf_queue: m_act"); 1839 #endif 1840 m->m_act = NULL; 1841 1842 t->tbf_q_len++; 1843 1844 splx(s); 1845 } 1846 1847 /* 1848 * processes the queue at the interface 1849 */ 1850 static void 1851 tbf_process_q(struct vif *vifp) 1852 { 1853 int s = splnet(); 1854 struct tbf *t = vifp->v_tbf; 1855 1856 /* loop through the queue at the interface and send as many packets 1857 * as possible 1858 */ 1859 while (t->tbf_q_len > 0) { 1860 struct mbuf *m = t->tbf_q; 1861 int len = mtod(m, struct ip *)->ip_len; 1862 1863 /* determine if the packet can be sent */ 1864 if (len > t->tbf_n_tok) /* not enough tokens, we are done */ 1865 break; 1866 /* ok, reduce no of tokens, dequeue and send the packet. */ 1867 t->tbf_n_tok -= len; 1868 1869 t->tbf_q = m->m_act; 1870 if (--t->tbf_q_len == 0) 1871 t->tbf_t = NULL; 1872 1873 m->m_act = NULL; 1874 tbf_send_packet(vifp, m); 1875 } 1876 splx(s); 1877 } 1878 1879 static void 1880 tbf_reprocess_q(void *xvifp) 1881 { 1882 struct vif *vifp = xvifp; 1883 1884 if (ip_mrouter == NULL) 1885 return; 1886 tbf_update_tokens(vifp); 1887 tbf_process_q(vifp); 1888 if (vifp->v_tbf->tbf_q_len) 1889 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); 1890 } 1891 1892 /* function that will selectively discard a member of the queue 1893 * based on the precedence value and the priority 1894 */ 1895 static int 1896 tbf_dq_sel(struct vif *vifp, struct ip *ip) 1897 { 1898 int s = splnet(); 1899 u_int p; 1900 struct mbuf *m, *last; 1901 struct mbuf **np; 1902 struct tbf *t = vifp->v_tbf; 1903 1904 p = priority(vifp, ip); 1905 1906 np = &t->tbf_q; 1907 last = NULL; 1908 while ((m = *np) != NULL) { 1909 if (p > priority(vifp, mtod(m, struct ip *))) { 1910 *np = m->m_act; 1911 /* If we're removing the last packet, fix the tail pointer */ 1912 if (m == t->tbf_t) 1913 t->tbf_t = last; 1914 m_freem(m); 1915 /* It's impossible for the queue to be empty, but check anyways. */ 1916 if (--t->tbf_q_len == 0) 1917 t->tbf_t = NULL; 1918 splx(s); 1919 mrtstat.mrts_drop_sel++; 1920 return 1; 1921 } 1922 np = &m->m_act; 1923 last = m; 1924 } 1925 splx(s); 1926 return 0; 1927 } 1928 1929 static void 1930 tbf_send_packet(struct vif *vifp, struct mbuf *m) 1931 { 1932 int s = splnet(); 1933 1934 if (vifp->v_flags & VIFF_TUNNEL) /* If tunnel options */ 1935 ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, NULL, NULL); 1936 else { 1937 struct ip_moptions imo; 1938 int error; 1939 static struct route ro; /* XXX check this */ 1940 1941 imo.imo_multicast_ifp = vifp->v_ifp; 1942 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; 1943 imo.imo_multicast_loop = 1; 1944 imo.imo_multicast_vif = -1; 1945 1946 /* 1947 * Re-entrancy should not be a problem here, because 1948 * the packets that we send out and are looped back at us 1949 * should get rejected because they appear to come from 1950 * the loopback interface, thus preventing looping. 1951 */ 1952 error = ip_output(m, NULL, &ro, IP_FORWARDING, &imo, NULL); 1953 1954 if (mrtdebug & DEBUG_XMIT) 1955 log(LOG_DEBUG, "phyint_send on vif %d err %d\n", 1956 (int)(vifp - viftable), error); 1957 } 1958 splx(s); 1959 } 1960 1961 /* determine the current time and then 1962 * the elapsed time (between the last time and time now) 1963 * in milliseconds & update the no. of tokens in the bucket 1964 */ 1965 static void 1966 tbf_update_tokens(struct vif *vifp) 1967 { 1968 struct timeval tp; 1969 u_long tm; 1970 int s = splnet(); 1971 struct tbf *t = vifp->v_tbf; 1972 1973 GET_TIME(tp); 1974 1975 TV_DELTA(tp, t->tbf_last_pkt_t, tm); 1976 1977 /* 1978 * This formula is actually 1979 * "time in seconds" * "bytes/second". 1980 * 1981 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) 1982 * 1983 * The (1000/1024) was introduced in add_vif to optimize 1984 * this divide into a shift. 1985 */ 1986 t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; 1987 t->tbf_last_pkt_t = tp; 1988 1989 if (t->tbf_n_tok > MAX_BKT_SIZE) 1990 t->tbf_n_tok = MAX_BKT_SIZE; 1991 1992 splx(s); 1993 } 1994 1995 static int 1996 priority(struct vif *vifp, struct ip *ip) 1997 { 1998 int prio = 50; /* the lowest priority -- default case */ 1999 2000 /* temporary hack; may add general packet classifier some day */ 2001 2002 /* 2003 * The UDP port space is divided up into four priority ranges: 2004 * [0, 16384) : unclassified - lowest priority 2005 * [16384, 32768) : audio - highest priority 2006 * [32768, 49152) : whiteboard - medium priority 2007 * [49152, 65536) : video - low priority 2008 * 2009 * Everything else gets lowest priority. 2010 */ 2011 if (ip->ip_p == IPPROTO_UDP) { 2012 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); 2013 switch (ntohs(udp->uh_dport) & 0xc000) { 2014 case 0x4000: 2015 prio = 70; 2016 break; 2017 case 0x8000: 2018 prio = 60; 2019 break; 2020 case 0xc000: 2021 prio = 55; 2022 break; 2023 } 2024 } 2025 return prio; 2026 } 2027 2028 /* 2029 * End of token bucket filter modifications 2030 */ 2031 2032 static int 2033 X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt) 2034 { 2035 int error, vifi, s; 2036 2037 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2038 return EOPNOTSUPP; 2039 2040 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 2041 if (error) 2042 return error; 2043 2044 s = splnet(); 2045 2046 if (vifi < 0 || vifi >= numvifs) { /* Error if vif is invalid */ 2047 splx(s); 2048 return EADDRNOTAVAIL; 2049 } 2050 2051 if (sopt->sopt_name == IP_RSVP_VIF_ON) { 2052 /* Check if socket is available. */ 2053 if (viftable[vifi].v_rsvpd != NULL) { 2054 splx(s); 2055 return EADDRINUSE; 2056 } 2057 2058 viftable[vifi].v_rsvpd = so; 2059 /* This may seem silly, but we need to be sure we don't over-increment 2060 * the RSVP counter, in case something slips up. 2061 */ 2062 if (!viftable[vifi].v_rsvp_on) { 2063 viftable[vifi].v_rsvp_on = 1; 2064 rsvp_on++; 2065 } 2066 } else { /* must be VIF_OFF */ 2067 /* 2068 * XXX as an additional consistency check, one could make sure 2069 * that viftable[vifi].v_rsvpd == so, otherwise passing so as 2070 * first parameter is pretty useless. 2071 */ 2072 viftable[vifi].v_rsvpd = NULL; 2073 /* 2074 * This may seem silly, but we need to be sure we don't over-decrement 2075 * the RSVP counter, in case something slips up. 2076 */ 2077 if (viftable[vifi].v_rsvp_on) { 2078 viftable[vifi].v_rsvp_on = 0; 2079 rsvp_on--; 2080 } 2081 } 2082 splx(s); 2083 return 0; 2084 } 2085 2086 static void 2087 X_ip_rsvp_force_done(struct socket *so) 2088 { 2089 int vifi; 2090 int s; 2091 2092 /* Don't bother if it is not the right type of socket. */ 2093 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2094 return; 2095 2096 s = splnet(); 2097 2098 /* The socket may be attached to more than one vif...this 2099 * is perfectly legal. 2100 */ 2101 for (vifi = 0; vifi < numvifs; vifi++) { 2102 if (viftable[vifi].v_rsvpd == so) { 2103 viftable[vifi].v_rsvpd = NULL; 2104 /* This may seem silly, but we need to be sure we don't 2105 * over-decrement the RSVP counter, in case something slips up. 2106 */ 2107 if (viftable[vifi].v_rsvp_on) { 2108 viftable[vifi].v_rsvp_on = 0; 2109 rsvp_on--; 2110 } 2111 } 2112 } 2113 2114 splx(s); 2115 } 2116 2117 static void 2118 X_rsvp_input(struct mbuf *m, int off, int proto) 2119 { 2120 int vifi; 2121 struct ip *ip = mtod(m, struct ip *); 2122 struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET }; 2123 int s; 2124 struct ifnet *ifp; 2125 2126 if (rsvpdebug) 2127 printf("rsvp_input: rsvp_on %d\n",rsvp_on); 2128 2129 /* Can still get packets with rsvp_on = 0 if there is a local member 2130 * of the group to which the RSVP packet is addressed. But in this 2131 * case we want to throw the packet away. 2132 */ 2133 if (!rsvp_on) { 2134 m_freem(m); 2135 return; 2136 } 2137 2138 s = splnet(); 2139 2140 if (rsvpdebug) 2141 printf("rsvp_input: check vifs\n"); 2142 2143 #ifdef DIAGNOSTIC 2144 if (!(m->m_flags & M_PKTHDR)) 2145 panic("rsvp_input no hdr"); 2146 #endif 2147 2148 ifp = m->m_pkthdr.rcvif; 2149 /* Find which vif the packet arrived on. */ 2150 for (vifi = 0; vifi < numvifs; vifi++) 2151 if (viftable[vifi].v_ifp == ifp) 2152 break; 2153 2154 if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { 2155 /* 2156 * If the old-style non-vif-associated socket is set, 2157 * then use it. Otherwise, drop packet since there 2158 * is no specific socket for this vif. 2159 */ 2160 if (ip_rsvpd != NULL) { 2161 if (rsvpdebug) 2162 printf("rsvp_input: Sending packet up old-style socket\n"); 2163 rip_input(m, off, proto); /* xxx */ 2164 } else { 2165 if (rsvpdebug && vifi == numvifs) 2166 printf("rsvp_input: Can't find vif for packet.\n"); 2167 else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) 2168 printf("rsvp_input: No socket defined for vif %d\n",vifi); 2169 m_freem(m); 2170 } 2171 splx(s); 2172 return; 2173 } 2174 rsvp_src.sin_addr = ip->ip_src; 2175 2176 if (rsvpdebug && m) 2177 printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n", 2178 m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); 2179 2180 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { 2181 if (rsvpdebug) 2182 printf("rsvp_input: Failed to append to socket\n"); 2183 } else { 2184 if (rsvpdebug) 2185 printf("rsvp_input: send packet up\n"); 2186 } 2187 2188 splx(s); 2189 } 2190 2191 /* 2192 * Code for bandwidth monitors 2193 */ 2194 2195 /* 2196 * Define common interface for timeval-related methods 2197 */ 2198 #define BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp) 2199 #define BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp)) 2200 #define BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp)) 2201 2202 static uint32_t 2203 compute_bw_meter_flags(struct bw_upcall *req) 2204 { 2205 uint32_t flags = 0; 2206 2207 if (req->bu_flags & BW_UPCALL_UNIT_PACKETS) 2208 flags |= BW_METER_UNIT_PACKETS; 2209 if (req->bu_flags & BW_UPCALL_UNIT_BYTES) 2210 flags |= BW_METER_UNIT_BYTES; 2211 if (req->bu_flags & BW_UPCALL_GEQ) 2212 flags |= BW_METER_GEQ; 2213 if (req->bu_flags & BW_UPCALL_LEQ) 2214 flags |= BW_METER_LEQ; 2215 2216 return flags; 2217 } 2218 2219 /* 2220 * Add a bw_meter entry 2221 */ 2222 static int 2223 add_bw_upcall(struct bw_upcall *req) 2224 { 2225 struct mfc *mfc; 2226 struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC, 2227 BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC }; 2228 struct timeval now; 2229 struct bw_meter *x; 2230 uint32_t flags; 2231 int s; 2232 2233 if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) 2234 return EOPNOTSUPP; 2235 2236 /* Test if the flags are valid */ 2237 if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES))) 2238 return EINVAL; 2239 if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))) 2240 return EINVAL; 2241 if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 2242 == (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 2243 return EINVAL; 2244 2245 /* Test if the threshold time interval is valid */ 2246 if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <)) 2247 return EINVAL; 2248 2249 flags = compute_bw_meter_flags(req); 2250 2251 /* 2252 * Find if we have already same bw_meter entry 2253 */ 2254 s = splnet(); 2255 mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); 2256 if (mfc == NULL) { 2257 splx(s); 2258 return EADDRNOTAVAIL; 2259 } 2260 for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) { 2261 if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 2262 &req->bu_threshold.b_time, ==)) && 2263 (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 2264 (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 2265 (x->bm_flags & BW_METER_USER_FLAGS) == flags) { 2266 splx(s); 2267 return 0; /* XXX Already installed */ 2268 } 2269 } 2270 splx(s); 2271 2272 /* Allocate the new bw_meter entry */ 2273 x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT); 2274 if (x == NULL) 2275 return ENOBUFS; 2276 2277 /* Set the new bw_meter entry */ 2278 x->bm_threshold.b_time = req->bu_threshold.b_time; 2279 GET_TIME(now); 2280 x->bm_start_time = now; 2281 x->bm_threshold.b_packets = req->bu_threshold.b_packets; 2282 x->bm_threshold.b_bytes = req->bu_threshold.b_bytes; 2283 x->bm_measured.b_packets = 0; 2284 x->bm_measured.b_bytes = 0; 2285 x->bm_flags = flags; 2286 x->bm_time_next = NULL; 2287 x->bm_time_hash = BW_METER_BUCKETS; 2288 2289 /* Add the new bw_meter entry to the front of entries for this MFC */ 2290 s = splnet(); 2291 x->bm_mfc = mfc; 2292 x->bm_mfc_next = mfc->mfc_bw_meter; 2293 mfc->mfc_bw_meter = x; 2294 schedule_bw_meter(x, &now); 2295 splx(s); 2296 2297 return 0; 2298 } 2299 2300 static void 2301 free_bw_list(struct bw_meter *list) 2302 { 2303 while (list != NULL) { 2304 struct bw_meter *x = list; 2305 2306 list = list->bm_mfc_next; 2307 unschedule_bw_meter(x); 2308 free(x, M_BWMETER); 2309 } 2310 } 2311 2312 /* 2313 * Delete one or multiple bw_meter entries 2314 */ 2315 static int 2316 del_bw_upcall(struct bw_upcall *req) 2317 { 2318 struct mfc *mfc; 2319 struct bw_meter *x; 2320 int s; 2321 2322 if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) 2323 return EOPNOTSUPP; 2324 2325 s = splnet(); 2326 /* Find the corresponding MFC entry */ 2327 mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); 2328 if (mfc == NULL) { 2329 splx(s); 2330 return EADDRNOTAVAIL; 2331 } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) { 2332 /* 2333 * Delete all bw_meter entries for this mfc 2334 */ 2335 struct bw_meter *list; 2336 2337 list = mfc->mfc_bw_meter; 2338 mfc->mfc_bw_meter = NULL; 2339 splx(s); 2340 free_bw_list(list); 2341 return 0; 2342 } else { /* Delete a single bw_meter entry */ 2343 struct bw_meter *prev; 2344 uint32_t flags = 0; 2345 2346 flags = compute_bw_meter_flags(req); 2347 2348 /* Find the bw_meter entry to delete */ 2349 for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL; 2350 prev = x, x = x->bm_mfc_next) { 2351 if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 2352 &req->bu_threshold.b_time, ==)) && 2353 (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 2354 (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 2355 (x->bm_flags & BW_METER_USER_FLAGS) == flags) 2356 break; 2357 } 2358 if (x != NULL) { /* Delete entry from the list for this MFC */ 2359 if (prev != NULL) 2360 prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/ 2361 else 2362 x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */ 2363 splx(s); 2364 2365 unschedule_bw_meter(x); 2366 /* Free the bw_meter entry */ 2367 free(x, M_BWMETER); 2368 return 0; 2369 } else { 2370 splx(s); 2371 return EINVAL; 2372 } 2373 } 2374 /* NOTREACHED */ 2375 } 2376 2377 /* 2378 * Perform bandwidth measurement processing that may result in an upcall 2379 */ 2380 static void 2381 bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp) 2382 { 2383 struct timeval delta; 2384 int s; 2385 2386 s = splnet(); 2387 delta = *nowp; 2388 BW_TIMEVALDECR(&delta, &x->bm_start_time); 2389 2390 if (x->bm_flags & BW_METER_GEQ) { 2391 /* 2392 * Processing for ">=" type of bw_meter entry 2393 */ 2394 if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 2395 /* Reset the bw_meter entry */ 2396 x->bm_start_time = *nowp; 2397 x->bm_measured.b_packets = 0; 2398 x->bm_measured.b_bytes = 0; 2399 x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2400 } 2401 2402 /* Record that a packet is received */ 2403 x->bm_measured.b_packets++; 2404 x->bm_measured.b_bytes += plen; 2405 2406 /* 2407 * Test if we should deliver an upcall 2408 */ 2409 if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) { 2410 if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2411 (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) || 2412 ((x->bm_flags & BW_METER_UNIT_BYTES) && 2413 (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) { 2414 /* Prepare an upcall for delivery */ 2415 bw_meter_prepare_upcall(x, nowp); 2416 x->bm_flags |= BW_METER_UPCALL_DELIVERED; 2417 } 2418 } 2419 } else if (x->bm_flags & BW_METER_LEQ) { 2420 /* 2421 * Processing for "<=" type of bw_meter entry 2422 */ 2423 if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 2424 /* 2425 * We are behind time with the multicast forwarding table 2426 * scanning for "<=" type of bw_meter entries, so test now 2427 * if we should deliver an upcall. 2428 */ 2429 if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2430 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 2431 ((x->bm_flags & BW_METER_UNIT_BYTES) && 2432 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 2433 /* Prepare an upcall for delivery */ 2434 bw_meter_prepare_upcall(x, nowp); 2435 } 2436 /* Reschedule the bw_meter entry */ 2437 unschedule_bw_meter(x); 2438 schedule_bw_meter(x, nowp); 2439 } 2440 2441 /* Record that a packet is received */ 2442 x->bm_measured.b_packets++; 2443 x->bm_measured.b_bytes += plen; 2444 2445 /* 2446 * Test if we should restart the measuring interval 2447 */ 2448 if ((x->bm_flags & BW_METER_UNIT_PACKETS && 2449 x->bm_measured.b_packets <= x->bm_threshold.b_packets) || 2450 (x->bm_flags & BW_METER_UNIT_BYTES && 2451 x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) { 2452 /* Don't restart the measuring interval */ 2453 } else { 2454 /* Do restart the measuring interval */ 2455 /* 2456 * XXX: note that we don't unschedule and schedule, because this 2457 * might be too much overhead per packet. Instead, when we process 2458 * all entries for a given timer hash bin, we check whether it is 2459 * really a timeout. If not, we reschedule at that time. 2460 */ 2461 x->bm_start_time = *nowp; 2462 x->bm_measured.b_packets = 0; 2463 x->bm_measured.b_bytes = 0; 2464 x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2465 } 2466 } 2467 splx(s); 2468 } 2469 2470 /* 2471 * Prepare a bandwidth-related upcall 2472 */ 2473 static void 2474 bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp) 2475 { 2476 struct timeval delta; 2477 struct bw_upcall *u; 2478 int s; 2479 2480 s = splnet(); 2481 2482 /* 2483 * Compute the measured time interval 2484 */ 2485 delta = *nowp; 2486 BW_TIMEVALDECR(&delta, &x->bm_start_time); 2487 2488 /* 2489 * If there are too many pending upcalls, deliver them now 2490 */ 2491 if (bw_upcalls_n >= BW_UPCALLS_MAX) 2492 bw_upcalls_send(); 2493 2494 /* 2495 * Set the bw_upcall entry 2496 */ 2497 u = &bw_upcalls[bw_upcalls_n++]; 2498 u->bu_src = x->bm_mfc->mfc_origin; 2499 u->bu_dst = x->bm_mfc->mfc_mcastgrp; 2500 u->bu_threshold.b_time = x->bm_threshold.b_time; 2501 u->bu_threshold.b_packets = x->bm_threshold.b_packets; 2502 u->bu_threshold.b_bytes = x->bm_threshold.b_bytes; 2503 u->bu_measured.b_time = delta; 2504 u->bu_measured.b_packets = x->bm_measured.b_packets; 2505 u->bu_measured.b_bytes = x->bm_measured.b_bytes; 2506 u->bu_flags = 0; 2507 if (x->bm_flags & BW_METER_UNIT_PACKETS) 2508 u->bu_flags |= BW_UPCALL_UNIT_PACKETS; 2509 if (x->bm_flags & BW_METER_UNIT_BYTES) 2510 u->bu_flags |= BW_UPCALL_UNIT_BYTES; 2511 if (x->bm_flags & BW_METER_GEQ) 2512 u->bu_flags |= BW_UPCALL_GEQ; 2513 if (x->bm_flags & BW_METER_LEQ) 2514 u->bu_flags |= BW_UPCALL_LEQ; 2515 2516 splx(s); 2517 } 2518 2519 /* 2520 * Send the pending bandwidth-related upcalls 2521 */ 2522 static void 2523 bw_upcalls_send(void) 2524 { 2525 struct mbuf *m; 2526 int len = bw_upcalls_n * sizeof(bw_upcalls[0]); 2527 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 2528 static struct igmpmsg igmpmsg = { 0, /* unused1 */ 2529 0, /* unused2 */ 2530 IGMPMSG_BW_UPCALL,/* im_msgtype */ 2531 0, /* im_mbz */ 2532 0, /* im_vif */ 2533 0, /* unused3 */ 2534 { 0 }, /* im_src */ 2535 { 0 } }; /* im_dst */ 2536 2537 if (bw_upcalls_n == 0) 2538 return; /* No pending upcalls */ 2539 2540 bw_upcalls_n = 0; 2541 2542 /* 2543 * Allocate a new mbuf, initialize it with the header and 2544 * the payload for the pending calls. 2545 */ 2546 MGETHDR(m, M_DONTWAIT, MT_HEADER); 2547 if (m == NULL) { 2548 log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n"); 2549 return; 2550 } 2551 2552 m->m_len = m->m_pkthdr.len = 0; 2553 m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg); 2554 m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&bw_upcalls[0]); 2555 2556 /* 2557 * Send the upcalls 2558 * XXX do we need to set the address in k_igmpsrc ? 2559 */ 2560 mrtstat.mrts_upcalls++; 2561 if (socket_send(ip_mrouter, m, &k_igmpsrc) < 0) { 2562 log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n"); 2563 ++mrtstat.mrts_upq_sockfull; 2564 } 2565 } 2566 2567 /* 2568 * Compute the timeout hash value for the bw_meter entries 2569 */ 2570 #define BW_METER_TIMEHASH(bw_meter, hash) \ 2571 do { \ 2572 struct timeval next_timeval = (bw_meter)->bm_start_time; \ 2573 \ 2574 BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \ 2575 (hash) = next_timeval.tv_sec; \ 2576 if (next_timeval.tv_usec) \ 2577 (hash)++; /* XXX: make sure we don't timeout early */ \ 2578 (hash) %= BW_METER_BUCKETS; \ 2579 } while (0) 2580 2581 /* 2582 * Schedule a timer to process periodically bw_meter entry of type "<=" 2583 * by linking the entry in the proper hash bucket. 2584 */ 2585 static void 2586 schedule_bw_meter(struct bw_meter *x, struct timeval *nowp) 2587 { 2588 int time_hash, s; 2589 2590 if (!(x->bm_flags & BW_METER_LEQ)) 2591 return; /* XXX: we schedule timers only for "<=" entries */ 2592 2593 /* 2594 * Reset the bw_meter entry 2595 */ 2596 s = splnet(); 2597 x->bm_start_time = *nowp; 2598 x->bm_measured.b_packets = 0; 2599 x->bm_measured.b_bytes = 0; 2600 x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2601 splx(s); 2602 2603 /* 2604 * Compute the timeout hash value and insert the entry 2605 */ 2606 BW_METER_TIMEHASH(x, time_hash); 2607 x->bm_time_next = bw_meter_timers[time_hash]; 2608 bw_meter_timers[time_hash] = x; 2609 x->bm_time_hash = time_hash; 2610 } 2611 2612 /* 2613 * Unschedule the periodic timer that processes bw_meter entry of type "<=" 2614 * by removing the entry from the proper hash bucket. 2615 */ 2616 static void 2617 unschedule_bw_meter(struct bw_meter *x) 2618 { 2619 int time_hash; 2620 struct bw_meter *prev, *tmp; 2621 2622 if (!(x->bm_flags & BW_METER_LEQ)) 2623 return; /* XXX: we schedule timers only for "<=" entries */ 2624 2625 /* 2626 * Compute the timeout hash value and delete the entry 2627 */ 2628 time_hash = x->bm_time_hash; 2629 if (time_hash >= BW_METER_BUCKETS) 2630 return; /* Entry was not scheduled */ 2631 2632 for (prev = NULL, tmp = bw_meter_timers[time_hash]; 2633 tmp != NULL; prev = tmp, tmp = tmp->bm_time_next) 2634 if (tmp == x) 2635 break; 2636 2637 if (tmp == NULL) 2638 panic("unschedule_bw_meter: bw_meter entry not found"); 2639 2640 if (prev != NULL) 2641 prev->bm_time_next = x->bm_time_next; 2642 else 2643 bw_meter_timers[time_hash] = x->bm_time_next; 2644 2645 x->bm_time_next = NULL; 2646 x->bm_time_hash = BW_METER_BUCKETS; 2647 } 2648 2649 2650 /* 2651 * Process all "<=" type of bw_meter that should be processed now, 2652 * and for each entry prepare an upcall if necessary. Each processed 2653 * entry is rescheduled again for the (periodic) processing. 2654 * 2655 * This is run periodically (once per second normally). On each round, 2656 * all the potentially matching entries are in the hash slot that we are 2657 * looking at. 2658 */ 2659 static void 2660 bw_meter_process() 2661 { 2662 static uint32_t last_tv_sec; /* last time we processed this */ 2663 2664 uint32_t loops; 2665 int i, s; 2666 struct timeval now, process_endtime; 2667 2668 GET_TIME(now); 2669 if (last_tv_sec == now.tv_sec) 2670 return; /* nothing to do */ 2671 2672 s = splnet(); 2673 loops = now.tv_sec - last_tv_sec; 2674 last_tv_sec = now.tv_sec; 2675 if (loops > BW_METER_BUCKETS) 2676 loops = BW_METER_BUCKETS; 2677 2678 /* 2679 * Process all bins of bw_meter entries from the one after the last 2680 * processed to the current one. On entry, i points to the last bucket 2681 * visited, so we need to increment i at the beginning of the loop. 2682 */ 2683 for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) { 2684 struct bw_meter *x, *tmp_list; 2685 2686 if (++i >= BW_METER_BUCKETS) 2687 i = 0; 2688 2689 /* Disconnect the list of bw_meter entries from the bin */ 2690 tmp_list = bw_meter_timers[i]; 2691 bw_meter_timers[i] = NULL; 2692 2693 /* Process the list of bw_meter entries */ 2694 while (tmp_list != NULL) { 2695 x = tmp_list; 2696 tmp_list = tmp_list->bm_time_next; 2697 2698 /* Test if the time interval is over */ 2699 process_endtime = x->bm_start_time; 2700 BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time); 2701 if (BW_TIMEVALCMP(&process_endtime, &now, >)) { 2702 /* Not yet: reschedule, but don't reset */ 2703 int time_hash; 2704 2705 BW_METER_TIMEHASH(x, time_hash); 2706 if (time_hash == i && process_endtime.tv_sec == now.tv_sec) { 2707 /* 2708 * XXX: somehow the bin processing is a bit ahead of time. 2709 * Put the entry in the next bin. 2710 */ 2711 if (++time_hash >= BW_METER_BUCKETS) 2712 time_hash = 0; 2713 } 2714 x->bm_time_next = bw_meter_timers[time_hash]; 2715 bw_meter_timers[time_hash] = x; 2716 x->bm_time_hash = time_hash; 2717 2718 continue; 2719 } 2720 2721 /* 2722 * Test if we should deliver an upcall 2723 */ 2724 if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2725 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 2726 ((x->bm_flags & BW_METER_UNIT_BYTES) && 2727 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 2728 /* Prepare an upcall for delivery */ 2729 bw_meter_prepare_upcall(x, &now); 2730 } 2731 2732 /* 2733 * Reschedule for next processing 2734 */ 2735 schedule_bw_meter(x, &now); 2736 } 2737 } 2738 splx(s); 2739 2740 /* Send all upcalls that are pending delivery */ 2741 bw_upcalls_send(); 2742 } 2743 2744 /* 2745 * A periodic function for sending all upcalls that are pending delivery 2746 */ 2747 static void 2748 expire_bw_upcalls_send(void *unused) 2749 { 2750 bw_upcalls_send(); 2751 2752 bw_upcalls_ch = timeout(expire_bw_upcalls_send, NULL, BW_UPCALLS_PERIOD); 2753 } 2754 2755 /* 2756 * A periodic function for periodic scanning of the multicast forwarding 2757 * table for processing all "<=" bw_meter entries. 2758 */ 2759 static void 2760 expire_bw_meter_process(void *unused) 2761 { 2762 if (mrt_api_config & MRT_MFC_BW_UPCALL) 2763 bw_meter_process(); 2764 2765 bw_meter_ch = timeout(expire_bw_meter_process, NULL, BW_METER_PERIOD); 2766 } 2767 2768 /* 2769 * End of bandwidth monitoring code 2770 */ 2771 2772 #ifdef PIM 2773 /* 2774 * Send the packet up to the user daemon, or eventually do kernel encapsulation 2775 * 2776 */ 2777 static int 2778 pim_register_send(struct ip *ip, struct vif *vifp, 2779 struct mbuf *m, struct mfc *rt) 2780 { 2781 struct mbuf *mb_copy, *mm; 2782 2783 if (mrtdebug & DEBUG_PIM) 2784 log(LOG_DEBUG, "pim_register_send: "); 2785 2786 mb_copy = pim_register_prepare(ip, m); 2787 if (mb_copy == NULL) 2788 return ENOBUFS; 2789 2790 /* 2791 * Send all the fragments. Note that the mbuf for each fragment 2792 * is freed by the sending machinery. 2793 */ 2794 for (mm = mb_copy; mm; mm = mb_copy) { 2795 mb_copy = mm->m_nextpkt; 2796 mm->m_nextpkt = 0; 2797 mm = m_pullup(mm, sizeof(struct ip)); 2798 if (mm != NULL) { 2799 ip = mtod(mm, struct ip *); 2800 if ((mrt_api_config & MRT_MFC_RP) && 2801 (rt->mfc_rp.s_addr != INADDR_ANY)) { 2802 pim_register_send_rp(ip, vifp, mm, rt); 2803 } else { 2804 pim_register_send_upcall(ip, vifp, mm, rt); 2805 } 2806 } 2807 } 2808 2809 return 0; 2810 } 2811 2812 /* 2813 * Return a copy of the data packet that is ready for PIM Register 2814 * encapsulation. 2815 * XXX: Note that in the returned copy the IP header is a valid one. 2816 */ 2817 static struct mbuf * 2818 pim_register_prepare(struct ip *ip, struct mbuf *m) 2819 { 2820 struct mbuf *mb_copy = NULL; 2821 int mtu; 2822 2823 /* Take care of delayed checksums */ 2824 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2825 in_delayed_cksum(m); 2826 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 2827 } 2828 2829 /* 2830 * Copy the old packet & pullup its IP header into the 2831 * new mbuf so we can modify it. 2832 */ 2833 mb_copy = m_copypacket(m, M_DONTWAIT); 2834 if (mb_copy == NULL) 2835 return NULL; 2836 mb_copy = m_pullup(mb_copy, ip->ip_hl << 2); 2837 if (mb_copy == NULL) 2838 return NULL; 2839 2840 /* take care of the TTL */ 2841 ip = mtod(mb_copy, struct ip *); 2842 --ip->ip_ttl; 2843 2844 /* Compute the MTU after the PIM Register encapsulation */ 2845 mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr); 2846 2847 if (ip->ip_len <= mtu) { 2848 /* Turn the IP header into a valid one */ 2849 ip->ip_len = htons(ip->ip_len); 2850 ip->ip_off = htons(ip->ip_off); 2851 ip->ip_sum = 0; 2852 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 2853 } else { 2854 /* Fragment the packet */ 2855 if (ip_fragment(ip, &mb_copy, mtu, 0, CSUM_DELAY_IP) != 0) { 2856 m_freem(mb_copy); 2857 return NULL; 2858 } 2859 } 2860 return mb_copy; 2861 } 2862 2863 /* 2864 * Send an upcall with the data packet to the user-level process. 2865 */ 2866 static int 2867 pim_register_send_upcall(struct ip *ip, struct vif *vifp, 2868 struct mbuf *mb_copy, struct mfc *rt) 2869 { 2870 struct mbuf *mb_first; 2871 int len = ntohs(ip->ip_len); 2872 struct igmpmsg *im; 2873 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 2874 2875 /* 2876 * Add a new mbuf with an upcall header 2877 */ 2878 MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 2879 if (mb_first == NULL) { 2880 m_freem(mb_copy); 2881 return ENOBUFS; 2882 } 2883 mb_first->m_data += max_linkhdr; 2884 mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg); 2885 mb_first->m_len = sizeof(struct igmpmsg); 2886 mb_first->m_next = mb_copy; 2887 2888 /* Send message to routing daemon */ 2889 im = mtod(mb_first, struct igmpmsg *); 2890 im->im_msgtype = IGMPMSG_WHOLEPKT; 2891 im->im_mbz = 0; 2892 im->im_vif = vifp - viftable; 2893 im->im_src = ip->ip_src; 2894 im->im_dst = ip->ip_dst; 2895 2896 k_igmpsrc.sin_addr = ip->ip_src; 2897 2898 mrtstat.mrts_upcalls++; 2899 2900 if (socket_send(ip_mrouter, mb_first, &k_igmpsrc) < 0) { 2901 if (mrtdebug & DEBUG_PIM) 2902 log(LOG_WARNING, 2903 "mcast: pim_register_send_upcall: ip_mrouter socket queue full"); 2904 ++mrtstat.mrts_upq_sockfull; 2905 return ENOBUFS; 2906 } 2907 2908 /* Keep statistics */ 2909 pimstat.pims_snd_registers_msgs++; 2910 pimstat.pims_snd_registers_bytes += len; 2911 2912 return 0; 2913 } 2914 2915 /* 2916 * Encapsulate the data packet in PIM Register message and send it to the RP. 2917 */ 2918 static int 2919 pim_register_send_rp(struct ip *ip, struct vif *vifp, 2920 struct mbuf *mb_copy, struct mfc *rt) 2921 { 2922 struct mbuf *mb_first; 2923 struct ip *ip_outer; 2924 struct pim_encap_pimhdr *pimhdr; 2925 int len = ntohs(ip->ip_len); 2926 vifi_t vifi = rt->mfc_parent; 2927 2928 if ((vifi >= numvifs) || (viftable[vifi].v_lcl_addr.s_addr == 0)) { 2929 m_freem(mb_copy); 2930 return EADDRNOTAVAIL; /* The iif vif is invalid */ 2931 } 2932 2933 /* 2934 * Add a new mbuf with the encapsulating header 2935 */ 2936 MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 2937 if (mb_first == NULL) { 2938 m_freem(mb_copy); 2939 return ENOBUFS; 2940 } 2941 mb_first->m_data += max_linkhdr; 2942 mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); 2943 mb_first->m_next = mb_copy; 2944 2945 mb_first->m_pkthdr.len = len + mb_first->m_len; 2946 2947 /* 2948 * Fill in the encapsulating IP and PIM header 2949 */ 2950 ip_outer = mtod(mb_first, struct ip *); 2951 *ip_outer = pim_encap_iphdr; 2952 #ifdef RANDOM_IP_ID 2953 ip_outer->ip_id = ip_randomid(); 2954 #else 2955 ip_outer->ip_id = htons(ip_id++); 2956 #endif 2957 ip_outer->ip_len = len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); 2958 ip_outer->ip_src = viftable[vifi].v_lcl_addr; 2959 ip_outer->ip_dst = rt->mfc_rp; 2960 /* 2961 * Copy the inner header TOS to the outer header, and take care of the 2962 * IP_DF bit. 2963 */ 2964 ip_outer->ip_tos = ip->ip_tos; 2965 if (ntohs(ip->ip_off) & IP_DF) 2966 ip_outer->ip_off |= IP_DF; 2967 pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer 2968 + sizeof(pim_encap_iphdr)); 2969 *pimhdr = pim_encap_pimhdr; 2970 /* If the iif crosses a border, set the Border-bit */ 2971 if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & mrt_api_config) 2972 pimhdr->flags |= htonl(PIM_BORDER_REGISTER); 2973 2974 mb_first->m_data += sizeof(pim_encap_iphdr); 2975 pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr)); 2976 mb_first->m_data -= sizeof(pim_encap_iphdr); 2977 2978 if (vifp->v_rate_limit == 0) 2979 tbf_send_packet(vifp, mb_first); 2980 else 2981 tbf_control(vifp, mb_first, ip, ip_outer->ip_len); 2982 2983 /* Keep statistics */ 2984 pimstat.pims_snd_registers_msgs++; 2985 pimstat.pims_snd_registers_bytes += len; 2986 2987 return 0; 2988 } 2989 2990 /* 2991 * PIM-SMv2 and PIM-DM messages processing. 2992 * Receives and verifies the PIM control messages, and passes them 2993 * up to the listening socket, using rip_input(). 2994 * The only message with special processing is the PIM_REGISTER message 2995 * (used by PIM-SM): the PIM header is stripped off, and the inner packet 2996 * is passed to if_simloop(). 2997 */ 2998 void 2999 pim_input(struct mbuf *m, int off, int proto) 3000 { 3001 struct ip *ip = mtod(m, struct ip *); 3002 struct pim *pim; 3003 int minlen; 3004 int datalen = ip->ip_len; 3005 int ip_tos; 3006 int iphlen = off; 3007 3008 /* Keep statistics */ 3009 pimstat.pims_rcv_total_msgs++; 3010 pimstat.pims_rcv_total_bytes += datalen; 3011 3012 /* 3013 * Validate lengths 3014 */ 3015 if (datalen < PIM_MINLEN) { 3016 pimstat.pims_rcv_tooshort++; 3017 log(LOG_ERR, "pim_input: packet size too small %d from %lx\n", 3018 datalen, (u_long)ip->ip_src.s_addr); 3019 m_freem(m); 3020 return; 3021 } 3022 3023 /* 3024 * If the packet is at least as big as a REGISTER, go agead 3025 * and grab the PIM REGISTER header size, to avoid another 3026 * possible m_pullup() later. 3027 * 3028 * PIM_MINLEN == pimhdr + u_int32_t == 4 + 4 = 8 3029 * PIM_REG_MINLEN == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28 3030 */ 3031 minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN); 3032 /* 3033 * Get the IP and PIM headers in contiguous memory, and 3034 * possibly the PIM REGISTER header. 3035 */ 3036 if ((m->m_flags & M_EXT || m->m_len < minlen) && 3037 (m = m_pullup(m, minlen)) == 0) { 3038 log(LOG_ERR, "pim_input: m_pullup failure\n"); 3039 return; 3040 } 3041 /* m_pullup() may have given us a new mbuf so reset ip. */ 3042 ip = mtod(m, struct ip *); 3043 ip_tos = ip->ip_tos; 3044 3045 /* adjust mbuf to point to the PIM header */ 3046 m->m_data += iphlen; 3047 m->m_len -= iphlen; 3048 pim = mtod(m, struct pim *); 3049 3050 /* 3051 * Validate checksum. If PIM REGISTER, exclude the data packet. 3052 * 3053 * XXX: some older PIMv2 implementations don't make this distinction, 3054 * so for compatibility reason perform the checksum over part of the 3055 * message, and if error, then over the whole message. 3056 */ 3057 if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) { 3058 /* do nothing, checksum okay */ 3059 } else if (in_cksum(m, datalen)) { 3060 pimstat.pims_rcv_badsum++; 3061 if (mrtdebug & DEBUG_PIM) 3062 log(LOG_DEBUG, "pim_input: invalid checksum"); 3063 m_freem(m); 3064 return; 3065 } 3066 3067 /* PIM version check */ 3068 if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) { 3069 pimstat.pims_rcv_badversion++; 3070 log(LOG_ERR, "pim_input: incorrect version %d, expecting %d\n", 3071 PIM_VT_V(pim->pim_vt), PIM_VERSION); 3072 m_freem(m); 3073 return; 3074 } 3075 3076 /* restore mbuf back to the outer IP */ 3077 m->m_data -= iphlen; 3078 m->m_len += iphlen; 3079 3080 if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) { 3081 /* 3082 * Since this is a REGISTER, we'll make a copy of the register 3083 * headers ip + pim + u_int32 + encap_ip, to be passed up to the 3084 * routing daemon. 3085 */ 3086 struct sockaddr_in dst = { sizeof(dst), AF_INET }; 3087 struct mbuf *mcp; 3088 struct ip *encap_ip; 3089 u_int32_t *reghdr; 3090 3091 if ((reg_vif_num >= numvifs) || (reg_vif_num == VIFI_INVALID)) { 3092 if (mrtdebug & DEBUG_PIM) 3093 log(LOG_DEBUG, 3094 "pim_input: register vif not set: %d\n", reg_vif_num); 3095 m_freem(m); 3096 return; 3097 } 3098 3099 /* 3100 * Validate length 3101 */ 3102 if (datalen < PIM_REG_MINLEN) { 3103 pimstat.pims_rcv_tooshort++; 3104 pimstat.pims_rcv_badregisters++; 3105 log(LOG_ERR, 3106 "pim_input: register packet size too small %d from %lx\n", 3107 datalen, (u_long)ip->ip_src.s_addr); 3108 m_freem(m); 3109 return; 3110 } 3111 3112 reghdr = (u_int32_t *)(pim + 1); 3113 encap_ip = (struct ip *)(reghdr + 1); 3114 3115 if (mrtdebug & DEBUG_PIM) { 3116 log(LOG_DEBUG, 3117 "pim_input[register], encap_ip: %lx -> %lx, encap_ip len %d\n", 3118 (u_long)ntohl(encap_ip->ip_src.s_addr), 3119 (u_long)ntohl(encap_ip->ip_dst.s_addr), 3120 ntohs(encap_ip->ip_len)); 3121 } 3122 3123 /* verify the version number of the inner packet */ 3124 if (encap_ip->ip_v != IPVERSION) { 3125 pimstat.pims_rcv_badregisters++; 3126 if (mrtdebug & DEBUG_PIM) { 3127 log(LOG_DEBUG, "pim_input: invalid IP version (%d) " 3128 "of the inner packet\n", encap_ip->ip_v); 3129 } 3130 m_freem(m); 3131 return; 3132 } 3133 3134 /* verify the inner packet is destined to a mcast group */ 3135 if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) { 3136 pimstat.pims_rcv_badregisters++; 3137 if (mrtdebug & DEBUG_PIM) 3138 log(LOG_DEBUG, 3139 "pim_input: inner packet of register is not " 3140 "multicast %lx\n", 3141 (u_long)ntohl(encap_ip->ip_dst.s_addr)); 3142 m_freem(m); 3143 return; 3144 } 3145 3146 /* 3147 * Copy the TOS from the outer IP header to the inner IP header. 3148 */ 3149 if (encap_ip->ip_tos != ip_tos) { 3150 /* Outer TOS -> inner TOS */ 3151 encap_ip->ip_tos = ip_tos; 3152 /* Recompute the inner header checksum. Sigh... */ 3153 3154 /* adjust mbuf to point to the inner IP header */ 3155 m->m_data += (iphlen + PIM_MINLEN); 3156 m->m_len -= (iphlen + PIM_MINLEN); 3157 3158 encap_ip->ip_sum = 0; 3159 encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2); 3160 3161 /* restore mbuf to point back to the outer IP header */ 3162 m->m_data -= (iphlen + PIM_MINLEN); 3163 m->m_len += (iphlen + PIM_MINLEN); 3164 } 3165 3166 /* If a NULL_REGISTER, pass it to the daemon */ 3167 if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) 3168 goto pim_input_to_daemon; 3169 3170 /* 3171 * Decapsulate the inner IP packet and loopback to forward it 3172 * as a normal multicast packet. Also, make a copy of the 3173 * outer_iphdr + pimhdr + reghdr + encap_iphdr 3174 * to pass to the daemon later, so it can take the appropriate 3175 * actions (e.g., send back PIM_REGISTER_STOP). 3176 * XXX: here m->m_data points to the outer IP header. 3177 */ 3178 mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN); 3179 if (mcp == NULL) { 3180 log(LOG_ERR, 3181 "pim_input: pim register: could not copy register head\n"); 3182 m_freem(m); 3183 return; 3184 } 3185 3186 /* Keep statistics */ 3187 /* XXX: registers_bytes include only the encap. mcast pkt */ 3188 pimstat.pims_rcv_registers_msgs++; 3189 pimstat.pims_rcv_registers_bytes += ntohs(encap_ip->ip_len); 3190 3191 /* 3192 * forward the inner ip packet; point m_data at the inner ip. 3193 */ 3194 m_adj(m, iphlen + PIM_MINLEN); 3195 3196 if (mrtdebug & DEBUG_PIM) { 3197 log(LOG_DEBUG, 3198 "pim_input: forwarding decapsulated register: " 3199 "src %lx, dst %lx, vif %d\n", 3200 (u_long)ntohl(encap_ip->ip_src.s_addr), 3201 (u_long)ntohl(encap_ip->ip_dst.s_addr), 3202 reg_vif_num); 3203 } 3204 if_simloop(viftable[reg_vif_num].v_ifp, m, dst.sin_family, 0); 3205 3206 /* prepare the register head to send to the mrouting daemon */ 3207 m = mcp; 3208 } 3209 3210 pim_input_to_daemon: 3211 /* 3212 * Pass the PIM message up to the daemon; if it is a Register message, 3213 * pass the 'head' only up to the daemon. This includes the 3214 * outer IP header, PIM header, PIM-Register header and the 3215 * inner IP header. 3216 * XXX: the outer IP header pkt size of a Register is not adjust to 3217 * reflect the fact that the inner multicast data is truncated. 3218 */ 3219 rip_input(m, iphlen, proto); 3220 3221 return; 3222 } 3223 #endif /* PIM */ 3224 3225 static int 3226 ip_mroute_modevent(module_t mod, int type, void *unused) 3227 { 3228 int s; 3229 3230 switch (type) { 3231 case MOD_LOAD: 3232 s = splnet(); 3233 /* XXX Protect against multiple loading */ 3234 ip_mcast_src = X_ip_mcast_src; 3235 ip_mforward = X_ip_mforward; 3236 ip_mrouter_done = X_ip_mrouter_done; 3237 ip_mrouter_get = X_ip_mrouter_get; 3238 ip_mrouter_set = X_ip_mrouter_set; 3239 ip_rsvp_force_done = X_ip_rsvp_force_done; 3240 ip_rsvp_vif = X_ip_rsvp_vif; 3241 ipip_input = X_ipip_input; 3242 legal_vif_num = X_legal_vif_num; 3243 mrt_ioctl = X_mrt_ioctl; 3244 rsvp_input_p = X_rsvp_input; 3245 splx(s); 3246 break; 3247 3248 case MOD_UNLOAD: 3249 if (ip_mrouter) 3250 return EINVAL; 3251 3252 s = splnet(); 3253 ip_mcast_src = NULL; 3254 ip_mforward = NULL; 3255 ip_mrouter_done = NULL; 3256 ip_mrouter_get = NULL; 3257 ip_mrouter_set = NULL; 3258 ip_rsvp_force_done = NULL; 3259 ip_rsvp_vif = NULL; 3260 ipip_input = NULL; 3261 legal_vif_num = NULL; 3262 mrt_ioctl = NULL; 3263 rsvp_input_p = NULL; 3264 splx(s); 3265 break; 3266 } 3267 return 0; 3268 } 3269 3270 static moduledata_t ip_mroutemod = { 3271 "ip_mroute", 3272 ip_mroute_modevent, 3273 0 3274 }; 3275 DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);

Cache object: 5ac39af5ef28cad5961593107b4cfbe9

FreeBSD/Linux Kernel Cross Reference sys/netinet/ip_mroute.c

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_mroute.c