1 /*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
34 * $FreeBSD: releng/5.2/sys/netinet/ip_output.c 124284 2004-01-09 12:18:17Z andre $
35 */
36
37 #include "opt_ipfw.h"
38 #include "opt_ipdn.h"
39 #include "opt_ipdivert.h"
40 #include "opt_ipfilter.h"
41 #include "opt_ipsec.h"
42 #include "opt_mac.h"
43 #include "opt_pfil_hooks.h"
44 #include "opt_random_ip_id.h"
45 #include "opt_mbuf_stress_test.h"
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/mac.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/protosw.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/sysctl.h>
57
58 #include <net/if.h>
59 #include <net/route.h>
60
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_var.h>
66 #include <netinet/ip_var.h>
67
68 #ifdef PFIL_HOOKS
69 #include <net/pfil.h>
70 #endif
71
72 #include <machine/in_cksum.h>
73
74 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
75
76 #ifdef IPSEC
77 #include <netinet6/ipsec.h>
78 #include <netkey/key.h>
79 #ifdef IPSEC_DEBUG
80 #include <netkey/key_debug.h>
81 #else
82 #define KEYDEBUG(lev,arg)
83 #endif
84 #endif /*IPSEC*/
85
86 #ifdef FAST_IPSEC
87 #include <netipsec/ipsec.h>
88 #include <netipsec/xform.h>
89 #include <netipsec/key.h>
90 #endif /*FAST_IPSEC*/
91
92 #include <netinet/ip_fw.h>
93 #include <netinet/ip_dummynet.h>
94
95 #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
96 x, (ntohl(a.s_addr)>>24)&0xFF,\
97 (ntohl(a.s_addr)>>16)&0xFF,\
98 (ntohl(a.s_addr)>>8)&0xFF,\
99 (ntohl(a.s_addr))&0xFF, y);
100
101 u_short ip_id;
102
103 #ifdef MBUF_STRESS_TEST
104 int mbuf_frag_size = 0;
105 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
106 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
107 #endif
108
109 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
110 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
111 static void ip_mloopback
112 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
113 static int ip_getmoptions
114 (struct sockopt *, struct ip_moptions *);
115 static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
116 static int ip_setmoptions
117 (struct sockopt *, struct ip_moptions **);
118
119 int ip_optcopy(struct ip *, struct ip *);
120
121
122 extern struct protosw inetsw[];
123
124 /*
125 * IP output. The packet in mbuf chain m contains a skeletal IP
126 * header (with len, off, ttl, proto, tos, src, dst).
127 * The mbuf chain containing the packet will be freed.
128 * The mbuf opt, if present, will not be freed.
129 * In the IP forwarding case, the packet will arrive with options already
130 * inserted, so must have a NULL opt pointer.
131 */
132 int
133 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
134 int flags, struct ip_moptions *imo, struct inpcb *inp)
135 {
136 struct ip *ip;
137 struct ifnet *ifp = NULL; /* keep compiler happy */
138 struct mbuf *m;
139 int hlen = sizeof (struct ip);
140 int len, off, error = 0;
141 struct sockaddr_in *dst = NULL; /* keep compiler happy */
142 struct in_ifaddr *ia = NULL;
143 int isbroadcast, sw_csum;
144 struct in_addr pkt_dst;
145 struct route iproute;
146 #ifdef IPSEC
147 struct socket *so;
148 struct secpolicy *sp = NULL;
149 #endif
150 #ifdef FAST_IPSEC
151 struct m_tag *mtag;
152 struct secpolicy *sp = NULL;
153 struct tdb_ident *tdbi;
154 int s;
155 #endif /* FAST_IPSEC */
156 struct ip_fw_args args;
157 int src_was_INADDR_ANY = 0; /* as the name says... */
158
159 args.eh = NULL;
160 args.rule = NULL;
161 args.next_hop = NULL;
162 args.divert_rule = 0; /* divert cookie */
163
164 /* Grab info from MT_TAG mbufs prepended to the chain. */
165 for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) {
166 switch(m0->_m_tag_id) {
167 default:
168 printf("ip_output: unrecognised MT_TAG tag %d\n",
169 m0->_m_tag_id);
170 break;
171
172 case PACKET_TAG_DUMMYNET:
173 /*
174 * the packet was already tagged, so part of the
175 * processing was already done, and we need to go down.
176 * Get parameters from the header.
177 */
178 args.rule = ((struct dn_pkt *)m0)->rule;
179 opt = NULL ;
180 ro = & ( ((struct dn_pkt *)m0)->ro ) ;
181 imo = NULL ;
182 dst = ((struct dn_pkt *)m0)->dn_dst ;
183 ifp = ((struct dn_pkt *)m0)->ifp ;
184 flags = ((struct dn_pkt *)m0)->flags ;
185 break;
186
187 case PACKET_TAG_DIVERT:
188 args.divert_rule = (intptr_t)m0->m_data & 0xffff;
189 break;
190
191 case PACKET_TAG_IPFORWARD:
192 args.next_hop = (struct sockaddr_in *)m0->m_data;
193 break;
194 }
195 }
196 m = m0;
197
198 #ifdef IPSEC
199 so = ipsec_getsocket(m);
200 (void)ipsec_setsocket(m, NULL);
201 #endif /*IPSEC*/
202
203 M_ASSERTPKTHDR(m);
204
205 if (ro == NULL) {
206 ro = &iproute;
207 bzero(ro, sizeof (*ro));
208 }
209
210 if (inp != NULL)
211 INP_LOCK_ASSERT(inp);
212
213 if (args.rule != NULL) { /* dummynet already saw us */
214 ip = mtod(m, struct ip *);
215 hlen = ip->ip_hl << 2 ;
216 if (ro->ro_rt)
217 ia = ifatoia(ro->ro_rt->rt_ifa);
218 goto sendit;
219 }
220
221 if (opt) {
222 len = 0;
223 m = ip_insertoptions(m, opt, &len);
224 if (len != 0)
225 hlen = len;
226 }
227 ip = mtod(m, struct ip *);
228 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
229
230 /*
231 * Fill in IP header. If we are not allowing fragmentation,
232 * then the ip_id field is meaningless, but we don't set it
233 * to zero. Doing so causes various problems when devices along
234 * the path (routers, load balancers, firewalls, etc.) illegally
235 * disable DF on our packet. Note that a 16-bit counter
236 * will wrap around in less than 10 seconds at 100 Mbit/s on a
237 * medium with MTU 1500. See Steven M. Bellovin, "A Technique
238 * for Counting NATted Hosts", Proc. IMW'02, available at
239 * <http://www.research.att.com/~smb/papers/fnat.pdf>.
240 */
241 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
242 ip->ip_v = IPVERSION;
243 ip->ip_hl = hlen >> 2;
244 #ifdef RANDOM_IP_ID
245 ip->ip_id = ip_randomid();
246 #else
247 ip->ip_id = htons(ip_id++);
248 #endif
249 ipstat.ips_localout++;
250 } else {
251 hlen = ip->ip_hl << 2;
252 }
253
254 dst = (struct sockaddr_in *)&ro->ro_dst;
255 /*
256 * If there is a cached route,
257 * check that it is to the same destination
258 * and is still up. If not, free it and try again.
259 * The address family should also be checked in case of sharing the
260 * cache with IPv6.
261 */
262 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
263 dst->sin_family != AF_INET ||
264 dst->sin_addr.s_addr != pkt_dst.s_addr)) {
265 RTFREE(ro->ro_rt);
266 ro->ro_rt = (struct rtentry *)0;
267 }
268 if (ro->ro_rt == 0) {
269 bzero(dst, sizeof(*dst));
270 dst->sin_family = AF_INET;
271 dst->sin_len = sizeof(*dst);
272 dst->sin_addr = pkt_dst;
273 }
274 /*
275 * If routing to interface only,
276 * short circuit routing lookup.
277 */
278 if (flags & IP_ROUTETOIF) {
279 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
280 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
281 ipstat.ips_noroute++;
282 error = ENETUNREACH;
283 goto bad;
284 }
285 ifp = ia->ia_ifp;
286 ip->ip_ttl = 1;
287 isbroadcast = in_broadcast(dst->sin_addr, ifp);
288 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
289 imo != NULL && imo->imo_multicast_ifp != NULL) {
290 /*
291 * Bypass the normal routing lookup for multicast
292 * packets if the interface is specified.
293 */
294 ifp = imo->imo_multicast_ifp;
295 IFP_TO_IA(ifp, ia);
296 isbroadcast = 0; /* fool gcc */
297 } else {
298 /*
299 * We want to do any cloning requested by the link layer,
300 * as this is probably required in all cases for correct
301 * operation (as it is for ARP).
302 */
303 if (ro->ro_rt == 0)
304 rtalloc(ro);
305 if (ro->ro_rt == 0) {
306 ipstat.ips_noroute++;
307 error = EHOSTUNREACH;
308 goto bad;
309 }
310 ia = ifatoia(ro->ro_rt->rt_ifa);
311 ifp = ro->ro_rt->rt_ifp;
312 ro->ro_rt->rt_rmx.rmx_pksent++;
313 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
314 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
315 if (ro->ro_rt->rt_flags & RTF_HOST)
316 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
317 else
318 isbroadcast = in_broadcast(dst->sin_addr, ifp);
319 }
320 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
321 struct in_multi *inm;
322
323 m->m_flags |= M_MCAST;
324 /*
325 * IP destination address is multicast. Make sure "dst"
326 * still points to the address in "ro". (It may have been
327 * changed to point to a gateway address, above.)
328 */
329 dst = (struct sockaddr_in *)&ro->ro_dst;
330 /*
331 * See if the caller provided any multicast options
332 */
333 if (imo != NULL) {
334 ip->ip_ttl = imo->imo_multicast_ttl;
335 if (imo->imo_multicast_vif != -1)
336 ip->ip_src.s_addr =
337 ip_mcast_src ?
338 ip_mcast_src(imo->imo_multicast_vif) :
339 INADDR_ANY;
340 } else
341 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
342 /*
343 * Confirm that the outgoing interface supports multicast.
344 */
345 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
346 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
347 ipstat.ips_noroute++;
348 error = ENETUNREACH;
349 goto bad;
350 }
351 }
352 /*
353 * If source address not specified yet, use address
354 * of outgoing interface.
355 */
356 if (ip->ip_src.s_addr == INADDR_ANY) {
357 /* Interface may have no addresses. */
358 if (ia != NULL)
359 ip->ip_src = IA_SIN(ia)->sin_addr;
360 }
361
362 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
363 /*
364 * XXX
365 * delayed checksums are not currently
366 * compatible with IP multicast routing
367 */
368 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
369 in_delayed_cksum(m);
370 m->m_pkthdr.csum_flags &=
371 ~CSUM_DELAY_DATA;
372 }
373 }
374 IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
375 if (inm != NULL &&
376 (imo == NULL || imo->imo_multicast_loop)) {
377 /*
378 * If we belong to the destination multicast group
379 * on the outgoing interface, and the caller did not
380 * forbid loopback, loop back a copy.
381 */
382 ip_mloopback(ifp, m, dst, hlen);
383 }
384 else {
385 /*
386 * If we are acting as a multicast router, perform
387 * multicast forwarding as if the packet had just
388 * arrived on the interface to which we are about
389 * to send. The multicast forwarding function
390 * recursively calls this function, using the
391 * IP_FORWARDING flag to prevent infinite recursion.
392 *
393 * Multicasts that are looped back by ip_mloopback(),
394 * above, will be forwarded by the ip_input() routine,
395 * if necessary.
396 */
397 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
398 /*
399 * If rsvp daemon is not running, do not
400 * set ip_moptions. This ensures that the packet
401 * is multicast and not just sent down one link
402 * as prescribed by rsvpd.
403 */
404 if (!rsvp_on)
405 imo = NULL;
406 if (ip_mforward &&
407 ip_mforward(ip, ifp, m, imo) != 0) {
408 m_freem(m);
409 goto done;
410 }
411 }
412 }
413
414 /*
415 * Multicasts with a time-to-live of zero may be looped-
416 * back, above, but must not be transmitted on a network.
417 * Also, multicasts addressed to the loopback interface
418 * are not sent -- the above call to ip_mloopback() will
419 * loop back a copy if this host actually belongs to the
420 * destination group on the loopback interface.
421 */
422 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
423 m_freem(m);
424 goto done;
425 }
426
427 goto sendit;
428 }
429 #ifndef notdef
430 /*
431 * If the source address is not specified yet, use the address
432 * of the outoing interface. In case, keep note we did that, so
433 * if the the firewall changes the next-hop causing the output
434 * interface to change, we can fix that.
435 */
436 if (ip->ip_src.s_addr == INADDR_ANY) {
437 /* Interface may have no addresses. */
438 if (ia != NULL) {
439 ip->ip_src = IA_SIN(ia)->sin_addr;
440 src_was_INADDR_ANY = 1;
441 }
442 }
443 #endif /* notdef */
444 /*
445 * Verify that we have any chance at all of being able to queue
446 * the packet or packet fragments
447 */
448 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
449 ifp->if_snd.ifq_maxlen) {
450 error = ENOBUFS;
451 ipstat.ips_odropped++;
452 goto bad;
453 }
454
455 /*
456 * Look for broadcast address and
457 * verify user is allowed to send
458 * such a packet.
459 */
460 if (isbroadcast) {
461 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
462 error = EADDRNOTAVAIL;
463 goto bad;
464 }
465 if ((flags & IP_ALLOWBROADCAST) == 0) {
466 error = EACCES;
467 goto bad;
468 }
469 /* don't allow broadcast messages to be fragmented */
470 if (ip->ip_len > ifp->if_mtu) {
471 error = EMSGSIZE;
472 goto bad;
473 }
474 if (flags & IP_SENDONES)
475 ip->ip_dst.s_addr = INADDR_BROADCAST;
476 m->m_flags |= M_BCAST;
477 } else {
478 m->m_flags &= ~M_BCAST;
479 }
480
481 sendit:
482 #ifdef IPSEC
483 /* get SP for this packet */
484 if (so == NULL)
485 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
486 flags, &error);
487 else
488 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
489
490 if (sp == NULL) {
491 ipsecstat.out_inval++;
492 goto bad;
493 }
494
495 error = 0;
496
497 /* check policy */
498 switch (sp->policy) {
499 case IPSEC_POLICY_DISCARD:
500 /*
501 * This packet is just discarded.
502 */
503 ipsecstat.out_polvio++;
504 goto bad;
505
506 case IPSEC_POLICY_BYPASS:
507 case IPSEC_POLICY_NONE:
508 /* no need to do IPsec. */
509 goto skip_ipsec;
510
511 case IPSEC_POLICY_IPSEC:
512 if (sp->req == NULL) {
513 /* acquire a policy */
514 error = key_spdacquire(sp);
515 goto bad;
516 }
517 break;
518
519 case IPSEC_POLICY_ENTRUST:
520 default:
521 printf("ip_output: Invalid policy found. %d\n", sp->policy);
522 }
523 {
524 struct ipsec_output_state state;
525 bzero(&state, sizeof(state));
526 state.m = m;
527 if (flags & IP_ROUTETOIF) {
528 state.ro = &iproute;
529 bzero(&iproute, sizeof(iproute));
530 } else
531 state.ro = ro;
532 state.dst = (struct sockaddr *)dst;
533
534 ip->ip_sum = 0;
535
536 /*
537 * XXX
538 * delayed checksums are not currently compatible with IPsec
539 */
540 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
541 in_delayed_cksum(m);
542 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
543 }
544
545 ip->ip_len = htons(ip->ip_len);
546 ip->ip_off = htons(ip->ip_off);
547
548 error = ipsec4_output(&state, sp, flags);
549
550 m = state.m;
551 if (flags & IP_ROUTETOIF) {
552 /*
553 * if we have tunnel mode SA, we may need to ignore
554 * IP_ROUTETOIF.
555 */
556 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
557 flags &= ~IP_ROUTETOIF;
558 ro = state.ro;
559 }
560 } else
561 ro = state.ro;
562 dst = (struct sockaddr_in *)state.dst;
563 if (error) {
564 /* mbuf is already reclaimed in ipsec4_output. */
565 m0 = NULL;
566 switch (error) {
567 case EHOSTUNREACH:
568 case ENETUNREACH:
569 case EMSGSIZE:
570 case ENOBUFS:
571 case ENOMEM:
572 break;
573 default:
574 printf("ip4_output (ipsec): error code %d\n", error);
575 /*fall through*/
576 case ENOENT:
577 /* don't show these error codes to the user */
578 error = 0;
579 break;
580 }
581 goto bad;
582 }
583 }
584
585 /* be sure to update variables that are affected by ipsec4_output() */
586 ip = mtod(m, struct ip *);
587 hlen = ip->ip_hl << 2;
588 if (ro->ro_rt == NULL) {
589 if ((flags & IP_ROUTETOIF) == 0) {
590 printf("ip_output: "
591 "can't update route after IPsec processing\n");
592 error = EHOSTUNREACH; /*XXX*/
593 goto bad;
594 }
595 } else {
596 ia = ifatoia(ro->ro_rt->rt_ifa);
597 ifp = ro->ro_rt->rt_ifp;
598 }
599
600 /* make it flipped, again. */
601 ip->ip_len = ntohs(ip->ip_len);
602 ip->ip_off = ntohs(ip->ip_off);
603 skip_ipsec:
604 #endif /*IPSEC*/
605 #ifdef FAST_IPSEC
606 /*
607 * Check the security policy (SP) for the packet and, if
608 * required, do IPsec-related processing. There are two
609 * cases here; the first time a packet is sent through
610 * it will be untagged and handled by ipsec4_checkpolicy.
611 * If the packet is resubmitted to ip_output (e.g. after
612 * AH, ESP, etc. processing), there will be a tag to bypass
613 * the lookup and related policy checking.
614 */
615 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
616 s = splnet();
617 if (mtag != NULL) {
618 tdbi = (struct tdb_ident *)(mtag + 1);
619 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
620 if (sp == NULL)
621 error = -EINVAL; /* force silent drop */
622 m_tag_delete(m, mtag);
623 } else {
624 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
625 &error, inp);
626 }
627 /*
628 * There are four return cases:
629 * sp != NULL apply IPsec policy
630 * sp == NULL, error == 0 no IPsec handling needed
631 * sp == NULL, error == -EINVAL discard packet w/o error
632 * sp == NULL, error != 0 discard packet, report error
633 */
634 if (sp != NULL) {
635 /* Loop detection, check if ipsec processing already done */
636 KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
637 for (mtag = m_tag_first(m); mtag != NULL;
638 mtag = m_tag_next(m, mtag)) {
639 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
640 continue;
641 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
642 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
643 continue;
644 /*
645 * Check if policy has an SA associated with it.
646 * This can happen when an SP has yet to acquire
647 * an SA; e.g. on first reference. If it occurs,
648 * then we let ipsec4_process_packet do its thing.
649 */
650 if (sp->req->sav == NULL)
651 break;
652 tdbi = (struct tdb_ident *)(mtag + 1);
653 if (tdbi->spi == sp->req->sav->spi &&
654 tdbi->proto == sp->req->sav->sah->saidx.proto &&
655 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
656 sizeof (union sockaddr_union)) == 0) {
657 /*
658 * No IPsec processing is needed, free
659 * reference to SP.
660 *
661 * NB: null pointer to avoid free at
662 * done: below.
663 */
664 KEY_FREESP(&sp), sp = NULL;
665 splx(s);
666 goto spd_done;
667 }
668 }
669
670 /*
671 * Do delayed checksums now because we send before
672 * this is done in the normal processing path.
673 */
674 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
675 in_delayed_cksum(m);
676 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
677 }
678
679 ip->ip_len = htons(ip->ip_len);
680 ip->ip_off = htons(ip->ip_off);
681
682 /* NB: callee frees mbuf */
683 error = ipsec4_process_packet(m, sp->req, flags, 0);
684 /*
685 * Preserve KAME behaviour: ENOENT can be returned
686 * when an SA acquire is in progress. Don't propagate
687 * this to user-level; it confuses applications.
688 *
689 * XXX this will go away when the SADB is redone.
690 */
691 if (error == ENOENT)
692 error = 0;
693 splx(s);
694 goto done;
695 } else {
696 splx(s);
697
698 if (error != 0) {
699 /*
700 * Hack: -EINVAL is used to signal that a packet
701 * should be silently discarded. This is typically
702 * because we asked key management for an SA and
703 * it was delayed (e.g. kicked up to IKE).
704 */
705 if (error == -EINVAL)
706 error = 0;
707 goto bad;
708 } else {
709 /* No IPsec processing for this packet. */
710 }
711 #ifdef notyet
712 /*
713 * If deferred crypto processing is needed, check that
714 * the interface supports it.
715 */
716 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
717 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
718 /* notify IPsec to do its own crypto */
719 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
720 error = EHOSTUNREACH;
721 goto bad;
722 }
723 #endif
724 }
725 spd_done:
726 #endif /* FAST_IPSEC */
727
728 /*
729 * IpHack's section.
730 * - Xlate: translate packet's addr/port (NAT).
731 * - Firewall: deny/allow/etc.
732 * - Wrap: fake packet's addr/port <unimpl.>
733 * - Encapsulate: put it in another IP and send out. <unimp.>
734 */
735 #ifdef PFIL_HOOKS
736 /*
737 * Run through list of hooks for output packets.
738 */
739 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT);
740 if (error != 0 || m == NULL)
741 goto done;
742 ip = mtod(m, struct ip *);
743 #endif /* PFIL_HOOKS */
744
745 /*
746 * Check with the firewall...
747 * but not if we are already being fwd'd from a firewall.
748 */
749 if (fw_enable && IPFW_LOADED && !args.next_hop) {
750 struct sockaddr_in *old = dst;
751
752 args.m = m;
753 args.next_hop = dst;
754 args.oif = ifp;
755 off = ip_fw_chk_ptr(&args);
756 m = args.m;
757 dst = args.next_hop;
758
759 /*
760 * On return we must do the following:
761 * m == NULL -> drop the pkt (old interface, deprecated)
762 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface)
763 * 1<=off<= 0xffff -> DIVERT
764 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe
765 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet
766 * dst != old -> IPFIREWALL_FORWARD
767 * off==0, dst==old -> accept
768 * If some of the above modules are not compiled in, then
769 * we should't have to check the corresponding condition
770 * (because the ipfw control socket should not accept
771 * unsupported rules), but better play safe and drop
772 * packets in case of doubt.
773 */
774 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) {
775 if (m)
776 m_freem(m);
777 error = EACCES;
778 goto done;
779 }
780 ip = mtod(m, struct ip *);
781 if (off == 0 && dst == old) /* common case */
782 goto pass;
783 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
784 /*
785 * pass the pkt to dummynet. Need to include
786 * pipe number, m, ifp, ro, dst because these are
787 * not recomputed in the next pass.
788 * All other parameters have been already used and
789 * so they are not needed anymore.
790 * XXX note: if the ifp or ro entry are deleted
791 * while a pkt is in dummynet, we are in trouble!
792 */
793 args.ro = ro;
794 args.dst = dst;
795 args.flags = flags;
796
797 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
798 &args);
799 goto done;
800 }
801 #ifdef IPDIVERT
802 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
803 struct mbuf *clone = NULL;
804
805 /* Clone packet if we're doing a 'tee' */
806 if ((off & IP_FW_PORT_TEE_FLAG) != 0)
807 clone = m_dup(m, M_DONTWAIT);
808
809 /*
810 * XXX
811 * delayed checksums are not currently compatible
812 * with divert sockets.
813 */
814 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
815 in_delayed_cksum(m);
816 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
817 }
818
819 /* Restore packet header fields to original values */
820 ip->ip_len = htons(ip->ip_len);
821 ip->ip_off = htons(ip->ip_off);
822
823 /* Deliver packet to divert input routine */
824 divert_packet(m, 0, off & 0xffff, args.divert_rule);
825
826 /* If 'tee', continue with original packet */
827 if (clone != NULL) {
828 m = clone;
829 ip = mtod(m, struct ip *);
830 goto pass;
831 }
832 goto done;
833 }
834 #endif
835
836 /* IPFIREWALL_FORWARD */
837 /*
838 * Check dst to make sure it is directly reachable on the
839 * interface we previously thought it was.
840 * If it isn't (which may be likely in some situations) we have
841 * to re-route it (ie, find a route for the next-hop and the
842 * associated interface) and set them here. This is nested
843 * forwarding which in most cases is undesirable, except where
844 * such control is nigh impossible. So we do it here.
845 * And I'm babbling.
846 */
847 if (off == 0 && old != dst) { /* FORWARD, dst has changed */
848 #if 0
849 /*
850 * XXX To improve readability, this block should be
851 * changed into a function call as below:
852 */
853 error = ip_ipforward(&m, &dst, &ifp);
854 if (error)
855 goto bad;
856 if (m == NULL) /* ip_input consumed the mbuf */
857 goto done;
858 #else
859 struct in_ifaddr *ia;
860
861 /*
862 * XXX sro_fwd below is static, and a pointer
863 * to it gets passed to routines downstream.
864 * This could have surprisingly bad results in
865 * practice, because its content is overwritten
866 * by subsequent packets.
867 */
868 /* There must be a better way to do this next line... */
869 static struct route sro_fwd;
870 struct route *ro_fwd = &sro_fwd;
871
872 #if 0
873 print_ip("IPFIREWALL_FORWARD: New dst ip: ",
874 dst->sin_addr, "\n");
875 #endif
876
877 /*
878 * We need to figure out if we have been forwarded
879 * to a local socket. If so, then we should somehow
880 * "loop back" to ip_input, and get directed to the
881 * PCB as if we had received this packet. This is
882 * because it may be dificult to identify the packets
883 * you want to forward until they are being output
884 * and have selected an interface. (e.g. locally
885 * initiated packets) If we used the loopback inteface,
886 * we would not be able to control what happens
887 * as the packet runs through ip_input() as
888 * it is done through an ISR.
889 */
890 LIST_FOREACH(ia,
891 INADDR_HASH(dst->sin_addr.s_addr), ia_hash) {
892 /*
893 * If the addr to forward to is one
894 * of ours, we pretend to
895 * be the destination for this packet.
896 */
897 if (IA_SIN(ia)->sin_addr.s_addr ==
898 dst->sin_addr.s_addr)
899 break;
900 }
901 if (ia) { /* tell ip_input "dont filter" */
902 struct m_hdr tag;
903
904 tag.mh_type = MT_TAG;
905 tag.mh_flags = PACKET_TAG_IPFORWARD;
906 tag.mh_data = (caddr_t)args.next_hop;
907 tag.mh_next = m;
908 tag.mh_nextpkt = NULL;
909
910 if (m->m_pkthdr.rcvif == NULL)
911 m->m_pkthdr.rcvif = ifunit("lo0");
912 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
913 m->m_pkthdr.csum_flags |=
914 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
915 m0->m_pkthdr.csum_data = 0xffff;
916 }
917 m->m_pkthdr.csum_flags |=
918 CSUM_IP_CHECKED | CSUM_IP_VALID;
919 ip->ip_len = htons(ip->ip_len);
920 ip->ip_off = htons(ip->ip_off);
921 ip_input((struct mbuf *)&tag);
922 goto done;
923 }
924 /*
925 * Some of the logic for this was
926 * nicked from above.
927 */
928 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
929
930 ro_fwd->ro_rt = 0;
931 rtalloc_ign(ro_fwd, RTF_CLONING);
932
933 if (ro_fwd->ro_rt == 0) {
934 ipstat.ips_noroute++;
935 error = EHOSTUNREACH;
936 goto bad;
937 }
938
939 ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
940 ifp = ro_fwd->ro_rt->rt_ifp;
941 ro_fwd->ro_rt->rt_rmx.rmx_pksent++;
942 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
943 dst = (struct sockaddr_in *)
944 ro_fwd->ro_rt->rt_gateway;
945 if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
946 isbroadcast =
947 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
948 else
949 isbroadcast = in_broadcast(dst->sin_addr, ifp);
950 if (ro->ro_rt)
951 RTFREE(ro->ro_rt);
952 ro->ro_rt = ro_fwd->ro_rt;
953 dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
954
955 #endif /* ... block to be put into a function */
956 /*
957 * If we added a default src ip earlier,
958 * which would have been gotten from the-then
959 * interface, do it again, from the new one.
960 */
961 if (src_was_INADDR_ANY)
962 ip->ip_src = IA_SIN(ia)->sin_addr;
963 goto pass ;
964 }
965
966 /*
967 * if we get here, none of the above matches, and
968 * we have to drop the pkt
969 */
970 m_freem(m);
971 error = EACCES; /* not sure this is the right error msg */
972 goto done;
973 }
974
975 pass:
976 /* 127/8 must not appear on wire - RFC1122. */
977 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
978 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
979 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
980 ipstat.ips_badaddr++;
981 error = EADDRNOTAVAIL;
982 goto bad;
983 }
984 }
985
986 m->m_pkthdr.csum_flags |= CSUM_IP;
987 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
988 if (sw_csum & CSUM_DELAY_DATA) {
989 in_delayed_cksum(m);
990 sw_csum &= ~CSUM_DELAY_DATA;
991 }
992 m->m_pkthdr.csum_flags &= ifp->if_hwassist;
993
994 /*
995 * If small enough for interface, or the interface will take
996 * care of the fragmentation for us, can just send directly.
997 */
998 if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
999 ((ip->ip_off & IP_DF) == 0))) {
1000 ip->ip_len = htons(ip->ip_len);
1001 ip->ip_off = htons(ip->ip_off);
1002 ip->ip_sum = 0;
1003 if (sw_csum & CSUM_DELAY_IP)
1004 ip->ip_sum = in_cksum(m, hlen);
1005
1006 /* Record statistics for this interface address. */
1007 if (!(flags & IP_FORWARDING) && ia) {
1008 ia->ia_ifa.if_opackets++;
1009 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1010 }
1011
1012 #ifdef IPSEC
1013 /* clean ipsec history once it goes out of the node */
1014 ipsec_delaux(m);
1015 #endif
1016
1017 #ifdef MBUF_STRESS_TEST
1018 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
1019 m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
1020 #endif
1021 error = (*ifp->if_output)(ifp, m,
1022 (struct sockaddr *)dst, ro->ro_rt);
1023 goto done;
1024 }
1025
1026 if (ip->ip_off & IP_DF) {
1027 error = EMSGSIZE;
1028 /*
1029 * This case can happen if the user changed the MTU
1030 * of an interface after enabling IP on it. Because
1031 * most netifs don't keep track of routes pointing to
1032 * them, there is no way for one to update all its
1033 * routes when the MTU is changed.
1034 */
1035 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
1036 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
1037 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
1038 }
1039 ipstat.ips_cantfrag++;
1040 goto bad;
1041 }
1042
1043 /*
1044 * Too large for interface; fragment if possible. If successful,
1045 * on return, m will point to a list of packets to be sent.
1046 */
1047 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
1048 if (error)
1049 goto bad;
1050 for (; m; m = m0) {
1051 m0 = m->m_nextpkt;
1052 m->m_nextpkt = 0;
1053 #ifdef IPSEC
1054 /* clean ipsec history once it goes out of the node */
1055 ipsec_delaux(m);
1056 #endif
1057 if (error == 0) {
1058 /* Record statistics for this interface address. */
1059 if (ia != NULL) {
1060 ia->ia_ifa.if_opackets++;
1061 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1062 }
1063
1064 error = (*ifp->if_output)(ifp, m,
1065 (struct sockaddr *)dst, ro->ro_rt);
1066 } else
1067 m_freem(m);
1068 }
1069
1070 if (error == 0)
1071 ipstat.ips_fragmented++;
1072
1073 done:
1074 if (ro == &iproute && ro->ro_rt) {
1075 RTFREE(ro->ro_rt);
1076 ro->ro_rt = NULL;
1077 }
1078 #ifdef IPSEC
1079 if (sp != NULL) {
1080 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1081 printf("DP ip_output call free SP:%p\n", sp));
1082 key_freesp(sp);
1083 }
1084 #endif
1085 #ifdef FAST_IPSEC
1086 if (sp != NULL)
1087 KEY_FREESP(&sp);
1088 #endif
1089 return (error);
1090 bad:
1091 m_freem(m);
1092 goto done;
1093 }
1094
1095 /*
1096 * Create a chain of fragments which fit the given mtu. m_frag points to the
1097 * mbuf to be fragmented; on return it points to the chain with the fragments.
1098 * Return 0 if no error. If error, m_frag may contain a partially built
1099 * chain of fragments that should be freed by the caller.
1100 *
1101 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
1102 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
1103 */
1104 int
1105 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
1106 u_long if_hwassist_flags, int sw_csum)
1107 {
1108 int error = 0;
1109 int hlen = ip->ip_hl << 2;
1110 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */
1111 int off;
1112 struct mbuf *m0 = *m_frag; /* the original packet */
1113 int firstlen;
1114 struct mbuf **mnext;
1115 int nfrags;
1116
1117 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
1118 ipstat.ips_cantfrag++;
1119 return EMSGSIZE;
1120 }
1121
1122 /*
1123 * Must be able to put at least 8 bytes per fragment.
1124 */
1125 if (len < 8)
1126 return EMSGSIZE;
1127
1128 /*
1129 * If the interface will not calculate checksums on
1130 * fragmented packets, then do it here.
1131 */
1132 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
1133 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
1134 in_delayed_cksum(m0);
1135 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1136 }
1137
1138 if (len > PAGE_SIZE) {
1139 /*
1140 * Fragment large datagrams such that each segment
1141 * contains a multiple of PAGE_SIZE amount of data,
1142 * plus headers. This enables a receiver to perform
1143 * page-flipping zero-copy optimizations.
1144 *
1145 * XXX When does this help given that sender and receiver
1146 * could have different page sizes, and also mtu could
1147 * be less than the receiver's page size ?
1148 */
1149 int newlen;
1150 struct mbuf *m;
1151
1152 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
1153 off += m->m_len;
1154
1155 /*
1156 * firstlen (off - hlen) must be aligned on an
1157 * 8-byte boundary
1158 */
1159 if (off < hlen)
1160 goto smart_frag_failure;
1161 off = ((off - hlen) & ~7) + hlen;
1162 newlen = (~PAGE_MASK) & mtu;
1163 if ((newlen + sizeof (struct ip)) > mtu) {
1164 /* we failed, go back the default */
1165 smart_frag_failure:
1166 newlen = len;
1167 off = hlen + len;
1168 }
1169 len = newlen;
1170
1171 } else {
1172 off = hlen + len;
1173 }
1174
1175 firstlen = off - hlen;
1176 mnext = &m0->m_nextpkt; /* pointer to next packet */
1177
1178 /*
1179 * Loop through length of segment after first fragment,
1180 * make new header and copy data of each part and link onto chain.
1181 * Here, m0 is the original packet, m is the fragment being created.
1182 * The fragments are linked off the m_nextpkt of the original
1183 * packet, which after processing serves as the first fragment.
1184 */
1185 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
1186 struct ip *mhip; /* ip header on the fragment */
1187 struct mbuf *m;
1188 int mhlen = sizeof (struct ip);
1189
1190 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1191 if (m == 0) {
1192 error = ENOBUFS;
1193 ipstat.ips_odropped++;
1194 goto done;
1195 }
1196 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
1197 /*
1198 * In the first mbuf, leave room for the link header, then
1199 * copy the original IP header including options. The payload
1200 * goes into an additional mbuf chain returned by m_copy().
1201 */
1202 m->m_data += max_linkhdr;
1203 mhip = mtod(m, struct ip *);
1204 *mhip = *ip;
1205 if (hlen > sizeof (struct ip)) {
1206 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
1207 mhip->ip_v = IPVERSION;
1208 mhip->ip_hl = mhlen >> 2;
1209 }
1210 m->m_len = mhlen;
1211 /* XXX do we need to add ip->ip_off below ? */
1212 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
1213 if (off + len >= ip->ip_len) { /* last fragment */
1214 len = ip->ip_len - off;
1215 m->m_flags |= M_LASTFRAG;
1216 } else
1217 mhip->ip_off |= IP_MF;
1218 mhip->ip_len = htons((u_short)(len + mhlen));
1219 m->m_next = m_copy(m0, off, len);
1220 if (m->m_next == 0) { /* copy failed */
1221 m_free(m);
1222 error = ENOBUFS; /* ??? */
1223 ipstat.ips_odropped++;
1224 goto done;
1225 }
1226 m->m_pkthdr.len = mhlen + len;
1227 m->m_pkthdr.rcvif = (struct ifnet *)0;
1228 #ifdef MAC
1229 mac_create_fragment(m0, m);
1230 #endif
1231 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
1232 mhip->ip_off = htons(mhip->ip_off);
1233 mhip->ip_sum = 0;
1234 if (sw_csum & CSUM_DELAY_IP)
1235 mhip->ip_sum = in_cksum(m, mhlen);
1236 *mnext = m;
1237 mnext = &m->m_nextpkt;
1238 }
1239 ipstat.ips_ofragments += nfrags;
1240
1241 /* set first marker for fragment chain */
1242 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1243 m0->m_pkthdr.csum_data = nfrags;
1244
1245 /*
1246 * Update first fragment by trimming what's been copied out
1247 * and updating header.
1248 */
1249 m_adj(m0, hlen + firstlen - ip->ip_len);
1250 m0->m_pkthdr.len = hlen + firstlen;
1251 ip->ip_len = htons((u_short)m0->m_pkthdr.len);
1252 ip->ip_off |= IP_MF;
1253 ip->ip_off = htons(ip->ip_off);
1254 ip->ip_sum = 0;
1255 if (sw_csum & CSUM_DELAY_IP)
1256 ip->ip_sum = in_cksum(m0, hlen);
1257
1258 done:
1259 *m_frag = m0;
1260 return error;
1261 }
1262
1263 void
1264 in_delayed_cksum(struct mbuf *m)
1265 {
1266 struct ip *ip;
1267 u_short csum, offset;
1268
1269 ip = mtod(m, struct ip *);
1270 offset = ip->ip_hl << 2 ;
1271 csum = in_cksum_skip(m, ip->ip_len, offset);
1272 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1273 csum = 0xffff;
1274 offset += m->m_pkthdr.csum_data; /* checksum offset */
1275
1276 if (offset + sizeof(u_short) > m->m_len) {
1277 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1278 m->m_len, offset, ip->ip_p);
1279 /*
1280 * XXX
1281 * this shouldn't happen, but if it does, the
1282 * correct behavior may be to insert the checksum
1283 * in the existing chain instead of rearranging it.
1284 */
1285 m = m_pullup(m, offset + sizeof(u_short));
1286 }
1287 *(u_short *)(m->m_data + offset) = csum;
1288 }
1289
1290 /*
1291 * Insert IP options into preformed packet.
1292 * Adjust IP destination as required for IP source routing,
1293 * as indicated by a non-zero in_addr at the start of the options.
1294 *
1295 * XXX This routine assumes that the packet has no options in place.
1296 */
1297 static struct mbuf *
1298 ip_insertoptions(m, opt, phlen)
1299 register struct mbuf *m;
1300 struct mbuf *opt;
1301 int *phlen;
1302 {
1303 register struct ipoption *p = mtod(opt, struct ipoption *);
1304 struct mbuf *n;
1305 register struct ip *ip = mtod(m, struct ip *);
1306 unsigned optlen;
1307
1308 optlen = opt->m_len - sizeof(p->ipopt_dst);
1309 if (optlen + ip->ip_len > IP_MAXPACKET) {
1310 *phlen = 0;
1311 return (m); /* XXX should fail */
1312 }
1313 if (p->ipopt_dst.s_addr)
1314 ip->ip_dst = p->ipopt_dst;
1315 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1316 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1317 if (n == 0) {
1318 *phlen = 0;
1319 return (m);
1320 }
1321 n->m_pkthdr.rcvif = (struct ifnet *)0;
1322 #ifdef MAC
1323 mac_create_mbuf_from_mbuf(m, n);
1324 #endif
1325 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1326 m->m_len -= sizeof(struct ip);
1327 m->m_data += sizeof(struct ip);
1328 n->m_next = m;
1329 m = n;
1330 m->m_len = optlen + sizeof(struct ip);
1331 m->m_data += max_linkhdr;
1332 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1333 } else {
1334 m->m_data -= optlen;
1335 m->m_len += optlen;
1336 m->m_pkthdr.len += optlen;
1337 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1338 }
1339 ip = mtod(m, struct ip *);
1340 bcopy(p->ipopt_list, ip + 1, optlen);
1341 *phlen = sizeof(struct ip) + optlen;
1342 ip->ip_v = IPVERSION;
1343 ip->ip_hl = *phlen >> 2;
1344 ip->ip_len += optlen;
1345 return (m);
1346 }
1347
1348 /*
1349 * Copy options from ip to jp,
1350 * omitting those not copied during fragmentation.
1351 */
1352 int
1353 ip_optcopy(ip, jp)
1354 struct ip *ip, *jp;
1355 {
1356 register u_char *cp, *dp;
1357 int opt, optlen, cnt;
1358
1359 cp = (u_char *)(ip + 1);
1360 dp = (u_char *)(jp + 1);
1361 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1362 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1363 opt = cp[0];
1364 if (opt == IPOPT_EOL)
1365 break;
1366 if (opt == IPOPT_NOP) {
1367 /* Preserve for IP mcast tunnel's LSRR alignment. */
1368 *dp++ = IPOPT_NOP;
1369 optlen = 1;
1370 continue;
1371 }
1372
1373 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
1374 ("ip_optcopy: malformed ipv4 option"));
1375 optlen = cp[IPOPT_OLEN];
1376 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
1377 ("ip_optcopy: malformed ipv4 option"));
1378
1379 /* bogus lengths should have been caught by ip_dooptions */
1380 if (optlen > cnt)
1381 optlen = cnt;
1382 if (IPOPT_COPIED(opt)) {
1383 bcopy(cp, dp, optlen);
1384 dp += optlen;
1385 }
1386 }
1387 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1388 *dp++ = IPOPT_EOL;
1389 return (optlen);
1390 }
1391
1392 /*
1393 * IP socket option processing.
1394 */
1395 int
1396 ip_ctloutput(so, sopt)
1397 struct socket *so;
1398 struct sockopt *sopt;
1399 {
1400 struct inpcb *inp = sotoinpcb(so);
1401 int error, optval;
1402
1403 error = optval = 0;
1404 if (sopt->sopt_level != IPPROTO_IP) {
1405 return (EINVAL);
1406 }
1407
1408 switch (sopt->sopt_dir) {
1409 case SOPT_SET:
1410 switch (sopt->sopt_name) {
1411 case IP_OPTIONS:
1412 #ifdef notyet
1413 case IP_RETOPTS:
1414 #endif
1415 {
1416 struct mbuf *m;
1417 if (sopt->sopt_valsize > MLEN) {
1418 error = EMSGSIZE;
1419 break;
1420 }
1421 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
1422 if (m == 0) {
1423 error = ENOBUFS;
1424 break;
1425 }
1426 m->m_len = sopt->sopt_valsize;
1427 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1428 m->m_len);
1429
1430 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1431 m));
1432 }
1433
1434 case IP_TOS:
1435 case IP_TTL:
1436 case IP_RECVOPTS:
1437 case IP_RECVRETOPTS:
1438 case IP_RECVDSTADDR:
1439 case IP_RECVTTL:
1440 case IP_RECVIF:
1441 case IP_FAITH:
1442 case IP_ONESBCAST:
1443 error = sooptcopyin(sopt, &optval, sizeof optval,
1444 sizeof optval);
1445 if (error)
1446 break;
1447
1448 switch (sopt->sopt_name) {
1449 case IP_TOS:
1450 inp->inp_ip_tos = optval;
1451 break;
1452
1453 case IP_TTL:
1454 inp->inp_ip_ttl = optval;
1455 break;
1456 #define OPTSET(bit) \
1457 if (optval) \
1458 inp->inp_flags |= bit; \
1459 else \
1460 inp->inp_flags &= ~bit;
1461
1462 case IP_RECVOPTS:
1463 OPTSET(INP_RECVOPTS);
1464 break;
1465
1466 case IP_RECVRETOPTS:
1467 OPTSET(INP_RECVRETOPTS);
1468 break;
1469
1470 case IP_RECVDSTADDR:
1471 OPTSET(INP_RECVDSTADDR);
1472 break;
1473
1474 case IP_RECVTTL:
1475 OPTSET(INP_RECVTTL);
1476 break;
1477
1478 case IP_RECVIF:
1479 OPTSET(INP_RECVIF);
1480 break;
1481
1482 case IP_FAITH:
1483 OPTSET(INP_FAITH);
1484 break;
1485
1486 case IP_ONESBCAST:
1487 OPTSET(INP_ONESBCAST);
1488 break;
1489 }
1490 break;
1491 #undef OPTSET
1492
1493 case IP_MULTICAST_IF:
1494 case IP_MULTICAST_VIF:
1495 case IP_MULTICAST_TTL:
1496 case IP_MULTICAST_LOOP:
1497 case IP_ADD_MEMBERSHIP:
1498 case IP_DROP_MEMBERSHIP:
1499 error = ip_setmoptions(sopt, &inp->inp_moptions);
1500 break;
1501
1502 case IP_PORTRANGE:
1503 error = sooptcopyin(sopt, &optval, sizeof optval,
1504 sizeof optval);
1505 if (error)
1506 break;
1507
1508 switch (optval) {
1509 case IP_PORTRANGE_DEFAULT:
1510 inp->inp_flags &= ~(INP_LOWPORT);
1511 inp->inp_flags &= ~(INP_HIGHPORT);
1512 break;
1513
1514 case IP_PORTRANGE_HIGH:
1515 inp->inp_flags &= ~(INP_LOWPORT);
1516 inp->inp_flags |= INP_HIGHPORT;
1517 break;
1518
1519 case IP_PORTRANGE_LOW:
1520 inp->inp_flags &= ~(INP_HIGHPORT);
1521 inp->inp_flags |= INP_LOWPORT;
1522 break;
1523
1524 default:
1525 error = EINVAL;
1526 break;
1527 }
1528 break;
1529
1530 #if defined(IPSEC) || defined(FAST_IPSEC)
1531 case IP_IPSEC_POLICY:
1532 {
1533 caddr_t req;
1534 size_t len = 0;
1535 int priv;
1536 struct mbuf *m;
1537 int optname;
1538
1539 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1540 break;
1541 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1542 break;
1543 priv = (sopt->sopt_td != NULL &&
1544 suser(sopt->sopt_td) != 0) ? 0 : 1;
1545 req = mtod(m, caddr_t);
1546 len = m->m_len;
1547 optname = sopt->sopt_name;
1548 error = ipsec4_set_policy(inp, optname, req, len, priv);
1549 m_freem(m);
1550 break;
1551 }
1552 #endif /*IPSEC*/
1553
1554 default:
1555 error = ENOPROTOOPT;
1556 break;
1557 }
1558 break;
1559
1560 case SOPT_GET:
1561 switch (sopt->sopt_name) {
1562 case IP_OPTIONS:
1563 case IP_RETOPTS:
1564 if (inp->inp_options)
1565 error = sooptcopyout(sopt,
1566 mtod(inp->inp_options,
1567 char *),
1568 inp->inp_options->m_len);
1569 else
1570 sopt->sopt_valsize = 0;
1571 break;
1572
1573 case IP_TOS:
1574 case IP_TTL:
1575 case IP_RECVOPTS:
1576 case IP_RECVRETOPTS:
1577 case IP_RECVDSTADDR:
1578 case IP_RECVTTL:
1579 case IP_RECVIF:
1580 case IP_PORTRANGE:
1581 case IP_FAITH:
1582 case IP_ONESBCAST:
1583 switch (sopt->sopt_name) {
1584
1585 case IP_TOS:
1586 optval = inp->inp_ip_tos;
1587 break;
1588
1589 case IP_TTL:
1590 optval = inp->inp_ip_ttl;
1591 break;
1592
1593 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1594
1595 case IP_RECVOPTS:
1596 optval = OPTBIT(INP_RECVOPTS);
1597 break;
1598
1599 case IP_RECVRETOPTS:
1600 optval = OPTBIT(INP_RECVRETOPTS);
1601 break;
1602
1603 case IP_RECVDSTADDR:
1604 optval = OPTBIT(INP_RECVDSTADDR);
1605 break;
1606
1607 case IP_RECVTTL:
1608 optval = OPTBIT(INP_RECVTTL);
1609 break;
1610
1611 case IP_RECVIF:
1612 optval = OPTBIT(INP_RECVIF);
1613 break;
1614
1615 case IP_PORTRANGE:
1616 if (inp->inp_flags & INP_HIGHPORT)
1617 optval = IP_PORTRANGE_HIGH;
1618 else if (inp->inp_flags & INP_LOWPORT)
1619 optval = IP_PORTRANGE_LOW;
1620 else
1621 optval = 0;
1622 break;
1623
1624 case IP_FAITH:
1625 optval = OPTBIT(INP_FAITH);
1626 break;
1627
1628 case IP_ONESBCAST:
1629 optval = OPTBIT(INP_ONESBCAST);
1630 break;
1631 }
1632 error = sooptcopyout(sopt, &optval, sizeof optval);
1633 break;
1634
1635 case IP_MULTICAST_IF:
1636 case IP_MULTICAST_VIF:
1637 case IP_MULTICAST_TTL:
1638 case IP_MULTICAST_LOOP:
1639 case IP_ADD_MEMBERSHIP:
1640 case IP_DROP_MEMBERSHIP:
1641 error = ip_getmoptions(sopt, inp->inp_moptions);
1642 break;
1643
1644 #if defined(IPSEC) || defined(FAST_IPSEC)
1645 case IP_IPSEC_POLICY:
1646 {
1647 struct mbuf *m = NULL;
1648 caddr_t req = NULL;
1649 size_t len = 0;
1650
1651 if (m != 0) {
1652 req = mtod(m, caddr_t);
1653 len = m->m_len;
1654 }
1655 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1656 if (error == 0)
1657 error = soopt_mcopyout(sopt, m); /* XXX */
1658 if (error == 0)
1659 m_freem(m);
1660 break;
1661 }
1662 #endif /*IPSEC*/
1663
1664 default:
1665 error = ENOPROTOOPT;
1666 break;
1667 }
1668 break;
1669 }
1670 return (error);
1671 }
1672
1673 /*
1674 * Set up IP options in pcb for insertion in output packets.
1675 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1676 * with destination address if source routed.
1677 */
1678 static int
1679 ip_pcbopts(optname, pcbopt, m)
1680 int optname;
1681 struct mbuf **pcbopt;
1682 register struct mbuf *m;
1683 {
1684 register int cnt, optlen;
1685 register u_char *cp;
1686 u_char opt;
1687
1688 /* turn off any old options */
1689 if (*pcbopt)
1690 (void)m_free(*pcbopt);
1691 *pcbopt = 0;
1692 if (m == (struct mbuf *)0 || m->m_len == 0) {
1693 /*
1694 * Only turning off any previous options.
1695 */
1696 if (m)
1697 (void)m_free(m);
1698 return (0);
1699 }
1700
1701 if (m->m_len % sizeof(int32_t))
1702 goto bad;
1703 /*
1704 * IP first-hop destination address will be stored before
1705 * actual options; move other options back
1706 * and clear it when none present.
1707 */
1708 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1709 goto bad;
1710 cnt = m->m_len;
1711 m->m_len += sizeof(struct in_addr);
1712 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1713 bcopy(mtod(m, void *), cp, (unsigned)cnt);
1714 bzero(mtod(m, void *), sizeof(struct in_addr));
1715
1716 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1717 opt = cp[IPOPT_OPTVAL];
1718 if (opt == IPOPT_EOL)
1719 break;
1720 if (opt == IPOPT_NOP)
1721 optlen = 1;
1722 else {
1723 if (cnt < IPOPT_OLEN + sizeof(*cp))
1724 goto bad;
1725 optlen = cp[IPOPT_OLEN];
1726 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1727 goto bad;
1728 }
1729 switch (opt) {
1730
1731 default:
1732 break;
1733
1734 case IPOPT_LSRR:
1735 case IPOPT_SSRR:
1736 /*
1737 * user process specifies route as:
1738 * ->A->B->C->D
1739 * D must be our final destination (but we can't
1740 * check that since we may not have connected yet).
1741 * A is first hop destination, which doesn't appear in
1742 * actual IP option, but is stored before the options.
1743 */
1744 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1745 goto bad;
1746 m->m_len -= sizeof(struct in_addr);
1747 cnt -= sizeof(struct in_addr);
1748 optlen -= sizeof(struct in_addr);
1749 cp[IPOPT_OLEN] = optlen;
1750 /*
1751 * Move first hop before start of options.
1752 */
1753 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1754 sizeof(struct in_addr));
1755 /*
1756 * Then copy rest of options back
1757 * to close up the deleted entry.
1758 */
1759 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
1760 &cp[IPOPT_OFFSET+1],
1761 (unsigned)cnt + sizeof(struct in_addr));
1762 break;
1763 }
1764 }
1765 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1766 goto bad;
1767 *pcbopt = m;
1768 return (0);
1769
1770 bad:
1771 (void)m_free(m);
1772 return (EINVAL);
1773 }
1774
1775 /*
1776 * XXX
1777 * The whole multicast option thing needs to be re-thought.
1778 * Several of these options are equally applicable to non-multicast
1779 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1780 * standard option (IP_TTL).
1781 */
1782
1783 /*
1784 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1785 */
1786 static struct ifnet *
1787 ip_multicast_if(a, ifindexp)
1788 struct in_addr *a;
1789 int *ifindexp;
1790 {
1791 int ifindex;
1792 struct ifnet *ifp;
1793
1794 if (ifindexp)
1795 *ifindexp = 0;
1796 if (ntohl(a->s_addr) >> 24 == 0) {
1797 ifindex = ntohl(a->s_addr) & 0xffffff;
1798 if (ifindex < 0 || if_index < ifindex)
1799 return NULL;
1800 ifp = ifnet_byindex(ifindex);
1801 if (ifindexp)
1802 *ifindexp = ifindex;
1803 } else {
1804 INADDR_TO_IFP(*a, ifp);
1805 }
1806 return ifp;
1807 }
1808
1809 /*
1810 * Set the IP multicast options in response to user setsockopt().
1811 */
1812 static int
1813 ip_setmoptions(sopt, imop)
1814 struct sockopt *sopt;
1815 struct ip_moptions **imop;
1816 {
1817 int error = 0;
1818 int i;
1819 struct in_addr addr;
1820 struct ip_mreq mreq;
1821 struct ifnet *ifp;
1822 struct ip_moptions *imo = *imop;
1823 struct route ro;
1824 struct sockaddr_in *dst;
1825 int ifindex;
1826 int s;
1827
1828 if (imo == NULL) {
1829 /*
1830 * No multicast option buffer attached to the pcb;
1831 * allocate one and initialize to default values.
1832 */
1833 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1834 M_WAITOK);
1835
1836 if (imo == NULL)
1837 return (ENOBUFS);
1838 *imop = imo;
1839 imo->imo_multicast_ifp = NULL;
1840 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1841 imo->imo_multicast_vif = -1;
1842 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1843 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1844 imo->imo_num_memberships = 0;
1845 }
1846
1847 switch (sopt->sopt_name) {
1848 /* store an index number for the vif you wanna use in the send */
1849 case IP_MULTICAST_VIF:
1850 if (legal_vif_num == 0) {
1851 error = EOPNOTSUPP;
1852 break;
1853 }
1854 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1855 if (error)
1856 break;
1857 if (!legal_vif_num(i) && (i != -1)) {
1858 error = EINVAL;
1859 break;
1860 }
1861 imo->imo_multicast_vif = i;
1862 break;
1863
1864 case IP_MULTICAST_IF:
1865 /*
1866 * Select the interface for outgoing multicast packets.
1867 */
1868 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1869 if (error)
1870 break;
1871 /*
1872 * INADDR_ANY is used to remove a previous selection.
1873 * When no interface is selected, a default one is
1874 * chosen every time a multicast packet is sent.
1875 */
1876 if (addr.s_addr == INADDR_ANY) {
1877 imo->imo_multicast_ifp = NULL;
1878 break;
1879 }
1880 /*
1881 * The selected interface is identified by its local
1882 * IP address. Find the interface and confirm that
1883 * it supports multicasting.
1884 */
1885 s = splimp();
1886 ifp = ip_multicast_if(&addr, &ifindex);
1887 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1888 splx(s);
1889 error = EADDRNOTAVAIL;
1890 break;
1891 }
1892 imo->imo_multicast_ifp = ifp;
1893 if (ifindex)
1894 imo->imo_multicast_addr = addr;
1895 else
1896 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1897 splx(s);
1898 break;
1899
1900 case IP_MULTICAST_TTL:
1901 /*
1902 * Set the IP time-to-live for outgoing multicast packets.
1903 * The original multicast API required a char argument,
1904 * which is inconsistent with the rest of the socket API.
1905 * We allow either a char or an int.
1906 */
1907 if (sopt->sopt_valsize == 1) {
1908 u_char ttl;
1909 error = sooptcopyin(sopt, &ttl, 1, 1);
1910 if (error)
1911 break;
1912 imo->imo_multicast_ttl = ttl;
1913 } else {
1914 u_int ttl;
1915 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1916 sizeof ttl);
1917 if (error)
1918 break;
1919 if (ttl > 255)
1920 error = EINVAL;
1921 else
1922 imo->imo_multicast_ttl = ttl;
1923 }
1924 break;
1925
1926 case IP_MULTICAST_LOOP:
1927 /*
1928 * Set the loopback flag for outgoing multicast packets.
1929 * Must be zero or one. The original multicast API required a
1930 * char argument, which is inconsistent with the rest
1931 * of the socket API. We allow either a char or an int.
1932 */
1933 if (sopt->sopt_valsize == 1) {
1934 u_char loop;
1935 error = sooptcopyin(sopt, &loop, 1, 1);
1936 if (error)
1937 break;
1938 imo->imo_multicast_loop = !!loop;
1939 } else {
1940 u_int loop;
1941 error = sooptcopyin(sopt, &loop, sizeof loop,
1942 sizeof loop);
1943 if (error)
1944 break;
1945 imo->imo_multicast_loop = !!loop;
1946 }
1947 break;
1948
1949 case IP_ADD_MEMBERSHIP:
1950 /*
1951 * Add a multicast group membership.
1952 * Group must be a valid IP multicast address.
1953 */
1954 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1955 if (error)
1956 break;
1957
1958 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1959 error = EINVAL;
1960 break;
1961 }
1962 s = splimp();
1963 /*
1964 * If no interface address was provided, use the interface of
1965 * the route to the given multicast address.
1966 */
1967 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1968 bzero((caddr_t)&ro, sizeof(ro));
1969 dst = (struct sockaddr_in *)&ro.ro_dst;
1970 dst->sin_len = sizeof(*dst);
1971 dst->sin_family = AF_INET;
1972 dst->sin_addr = mreq.imr_multiaddr;
1973 rtalloc_ign(&ro, RTF_CLONING);
1974 if (ro.ro_rt == NULL) {
1975 error = EADDRNOTAVAIL;
1976 splx(s);
1977 break;
1978 }
1979 ifp = ro.ro_rt->rt_ifp;
1980 RTFREE(ro.ro_rt);
1981 }
1982 else {
1983 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1984 }
1985
1986 /*
1987 * See if we found an interface, and confirm that it
1988 * supports multicast.
1989 */
1990 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1991 error = EADDRNOTAVAIL;
1992 splx(s);
1993 break;
1994 }
1995 /*
1996 * See if the membership already exists or if all the
1997 * membership slots are full.
1998 */
1999 for (i = 0; i < imo->imo_num_memberships; ++i) {
2000 if (imo->imo_membership[i]->inm_ifp == ifp &&
2001 imo->imo_membership[i]->inm_addr.s_addr
2002 == mreq.imr_multiaddr.s_addr)
2003 break;
2004 }
2005 if (i < imo->imo_num_memberships) {
2006 error = EADDRINUSE;
2007 splx(s);
2008 break;
2009 }
2010 if (i == IP_MAX_MEMBERSHIPS) {
2011 error = ETOOMANYREFS;
2012 splx(s);
2013 break;
2014 }
2015 /*
2016 * Everything looks good; add a new record to the multicast
2017 * address list for the given interface.
2018 */
2019 if ((imo->imo_membership[i] =
2020 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
2021 error = ENOBUFS;
2022 splx(s);
2023 break;
2024 }
2025 ++imo->imo_num_memberships;
2026 splx(s);
2027 break;
2028
2029 case IP_DROP_MEMBERSHIP:
2030 /*
2031 * Drop a multicast group membership.
2032 * Group must be a valid IP multicast address.
2033 */
2034 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
2035 if (error)
2036 break;
2037
2038 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
2039 error = EINVAL;
2040 break;
2041 }
2042
2043 s = splimp();
2044 /*
2045 * If an interface address was specified, get a pointer
2046 * to its ifnet structure.
2047 */
2048 if (mreq.imr_interface.s_addr == INADDR_ANY)
2049 ifp = NULL;
2050 else {
2051 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
2052 if (ifp == NULL) {
2053 error = EADDRNOTAVAIL;
2054 splx(s);
2055 break;
2056 }
2057 }
2058 /*
2059 * Find the membership in the membership array.
2060 */
2061 for (i = 0; i < imo->imo_num_memberships; ++i) {
2062 if ((ifp == NULL ||
2063 imo->imo_membership[i]->inm_ifp == ifp) &&
2064 imo->imo_membership[i]->inm_addr.s_addr ==
2065 mreq.imr_multiaddr.s_addr)
2066 break;
2067 }
2068 if (i == imo->imo_num_memberships) {
2069 error = EADDRNOTAVAIL;
2070 splx(s);
2071 break;
2072 }
2073 /*
2074 * Give up the multicast address record to which the
2075 * membership points.
2076 */
2077 in_delmulti(imo->imo_membership[i]);
2078 /*
2079 * Remove the gap in the membership array.
2080 */
2081 for (++i; i < imo->imo_num_memberships; ++i)
2082 imo->imo_membership[i-1] = imo->imo_membership[i];
2083 --imo->imo_num_memberships;
2084 splx(s);
2085 break;
2086
2087 default:
2088 error = EOPNOTSUPP;
2089 break;
2090 }
2091
2092 /*
2093 * If all options have default values, no need to keep the mbuf.
2094 */
2095 if (imo->imo_multicast_ifp == NULL &&
2096 imo->imo_multicast_vif == -1 &&
2097 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
2098 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
2099 imo->imo_num_memberships == 0) {
2100 free(*imop, M_IPMOPTS);
2101 *imop = NULL;
2102 }
2103
2104 return (error);
2105 }
2106
2107 /*
2108 * Return the IP multicast options in response to user getsockopt().
2109 */
2110 static int
2111 ip_getmoptions(sopt, imo)
2112 struct sockopt *sopt;
2113 register struct ip_moptions *imo;
2114 {
2115 struct in_addr addr;
2116 struct in_ifaddr *ia;
2117 int error, optval;
2118 u_char coptval;
2119
2120 error = 0;
2121 switch (sopt->sopt_name) {
2122 case IP_MULTICAST_VIF:
2123 if (imo != NULL)
2124 optval = imo->imo_multicast_vif;
2125 else
2126 optval = -1;
2127 error = sooptcopyout(sopt, &optval, sizeof optval);
2128 break;
2129
2130 case IP_MULTICAST_IF:
2131 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2132 addr.s_addr = INADDR_ANY;
2133 else if (imo->imo_multicast_addr.s_addr) {
2134 /* return the value user has set */
2135 addr = imo->imo_multicast_addr;
2136 } else {
2137 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2138 addr.s_addr = (ia == NULL) ? INADDR_ANY
2139 : IA_SIN(ia)->sin_addr.s_addr;
2140 }
2141 error = sooptcopyout(sopt, &addr, sizeof addr);
2142 break;
2143
2144 case IP_MULTICAST_TTL:
2145 if (imo == 0)
2146 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2147 else
2148 optval = coptval = imo->imo_multicast_ttl;
2149 if (sopt->sopt_valsize == 1)
2150 error = sooptcopyout(sopt, &coptval, 1);
2151 else
2152 error = sooptcopyout(sopt, &optval, sizeof optval);
2153 break;
2154
2155 case IP_MULTICAST_LOOP:
2156 if (imo == 0)
2157 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2158 else
2159 optval = coptval = imo->imo_multicast_loop;
2160 if (sopt->sopt_valsize == 1)
2161 error = sooptcopyout(sopt, &coptval, 1);
2162 else
2163 error = sooptcopyout(sopt, &optval, sizeof optval);
2164 break;
2165
2166 default:
2167 error = ENOPROTOOPT;
2168 break;
2169 }
2170 return (error);
2171 }
2172
2173 /*
2174 * Discard the IP multicast options.
2175 */
2176 void
2177 ip_freemoptions(imo)
2178 register struct ip_moptions *imo;
2179 {
2180 register int i;
2181
2182 if (imo != NULL) {
2183 for (i = 0; i < imo->imo_num_memberships; ++i)
2184 in_delmulti(imo->imo_membership[i]);
2185 free(imo, M_IPMOPTS);
2186 }
2187 }
2188
2189 /*
2190 * Routine called from ip_output() to loop back a copy of an IP multicast
2191 * packet to the input queue of a specified interface. Note that this
2192 * calls the output routine of the loopback "driver", but with an interface
2193 * pointer that might NOT be a loopback interface -- evil, but easier than
2194 * replicating that code here.
2195 */
2196 static void
2197 ip_mloopback(ifp, m, dst, hlen)
2198 struct ifnet *ifp;
2199 register struct mbuf *m;
2200 register struct sockaddr_in *dst;
2201 int hlen;
2202 {
2203 register struct ip *ip;
2204 struct mbuf *copym;
2205
2206 copym = m_copy(m, 0, M_COPYALL);
2207 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2208 copym = m_pullup(copym, hlen);
2209 if (copym != NULL) {
2210 /*
2211 * We don't bother to fragment if the IP length is greater
2212 * than the interface's MTU. Can this possibly matter?
2213 */
2214 ip = mtod(copym, struct ip *);
2215 ip->ip_len = htons(ip->ip_len);
2216 ip->ip_off = htons(ip->ip_off);
2217 ip->ip_sum = 0;
2218 ip->ip_sum = in_cksum(copym, hlen);
2219 /*
2220 * NB:
2221 * It's not clear whether there are any lingering
2222 * reentrancy problems in other areas which might
2223 * be exposed by using ip_input directly (in
2224 * particular, everything which modifies the packet
2225 * in-place). Yet another option is using the
2226 * protosw directly to deliver the looped back
2227 * packet. For the moment, we'll err on the side
2228 * of safety by using if_simloop().
2229 */
2230 #if 1 /* XXX */
2231 if (dst->sin_family != AF_INET) {
2232 printf("ip_mloopback: bad address family %d\n",
2233 dst->sin_family);
2234 dst->sin_family = AF_INET;
2235 }
2236 #endif
2237
2238 #ifdef notdef
2239 copym->m_pkthdr.rcvif = ifp;
2240 ip_input(copym);
2241 #else
2242 /* if the checksum hasn't been computed, mark it as valid */
2243 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2244 copym->m_pkthdr.csum_flags |=
2245 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2246 copym->m_pkthdr.csum_data = 0xffff;
2247 }
2248 if_simloop(ifp, copym, dst->sin_family, 0);
2249 #endif
2250 }
2251 }
Cache object: 9ce0c4a0c6c6ea3400e7746d0cfa8873
|