1 /*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
30 * $FreeBSD: releng/6.4/sys/netinet/ip_output.c 167338 2007-03-08 13:19:03Z bms $
31 */
32
33 #include "opt_ipfw.h"
34 #include "opt_ipsec.h"
35 #include "opt_mac.h"
36 #include "opt_mbuf_stress_test.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/mac.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48
49 #include <net/if.h>
50 #include <net/netisr.h>
51 #include <net/pfil.h>
52 #include <net/route.h>
53
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_var.h>
60
61 #include <machine/in_cksum.h>
62
63 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
64
65 #ifdef IPSEC
66 #include <netinet6/ipsec.h>
67 #include <netkey/key.h>
68 #ifdef IPSEC_DEBUG
69 #include <netkey/key_debug.h>
70 #else
71 #define KEYDEBUG(lev,arg)
72 #endif
73 #endif /*IPSEC*/
74
75 #ifdef FAST_IPSEC
76 #include <netipsec/ipsec.h>
77 #include <netipsec/xform.h>
78 #include <netipsec/key.h>
79 #endif /*FAST_IPSEC*/
80
81 #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
82 x, (ntohl(a.s_addr)>>24)&0xFF,\
83 (ntohl(a.s_addr)>>16)&0xFF,\
84 (ntohl(a.s_addr)>>8)&0xFF,\
85 (ntohl(a.s_addr))&0xFF, y);
86
87 u_short ip_id;
88
89 #ifdef MBUF_STRESS_TEST
90 int mbuf_frag_size = 0;
91 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
92 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
93 #endif
94
95 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
96 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
97 static void ip_mloopback
98 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
99 static int ip_getmoptions(struct inpcb *, struct sockopt *);
100 static int ip_pcbopts(struct inpcb *, int, struct mbuf *);
101 static int ip_setmoptions(struct inpcb *, struct sockopt *);
102 static struct ip_moptions *ip_findmoptions(struct inpcb *inp);
103
104 int ip_optcopy(struct ip *, struct ip *);
105
106
107 extern struct protosw inetsw[];
108
109 /*
110 * IP output. The packet in mbuf chain m contains a skeletal IP
111 * header (with len, off, ttl, proto, tos, src, dst).
112 * The mbuf chain containing the packet will be freed.
113 * The mbuf opt, if present, will not be freed.
114 * In the IP forwarding case, the packet will arrive with options already
115 * inserted, so must have a NULL opt pointer.
116 */
117 int
118 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
119 int flags, struct ip_moptions *imo, struct inpcb *inp)
120 {
121 struct ip *ip;
122 struct ifnet *ifp = NULL; /* keep compiler happy */
123 struct mbuf *m0;
124 int hlen = sizeof (struct ip);
125 int len, error = 0;
126 struct sockaddr_in *dst = NULL; /* keep compiler happy */
127 struct in_ifaddr *ia = NULL;
128 int isbroadcast, sw_csum;
129 struct route iproute;
130 struct in_addr odst;
131 #ifdef IPFIREWALL_FORWARD
132 struct m_tag *fwd_tag = NULL;
133 #endif
134 #ifdef IPSEC
135 struct secpolicy *sp = NULL;
136 #endif
137 #ifdef FAST_IPSEC
138 struct secpolicy *sp = NULL;
139 struct tdb_ident *tdbi;
140 struct m_tag *mtag;
141 int s;
142 #endif /* FAST_IPSEC */
143
144 M_ASSERTPKTHDR(m);
145
146 if (ro == NULL) {
147 ro = &iproute;
148 bzero(ro, sizeof (*ro));
149 }
150
151 if (inp != NULL)
152 INP_LOCK_ASSERT(inp);
153
154 if (opt) {
155 len = 0;
156 m = ip_insertoptions(m, opt, &len);
157 if (len != 0)
158 hlen = len;
159 }
160 ip = mtod(m, struct ip *);
161
162 /*
163 * Fill in IP header. If we are not allowing fragmentation,
164 * then the ip_id field is meaningless, but we don't set it
165 * to zero. Doing so causes various problems when devices along
166 * the path (routers, load balancers, firewalls, etc.) illegally
167 * disable DF on our packet. Note that a 16-bit counter
168 * will wrap around in less than 10 seconds at 100 Mbit/s on a
169 * medium with MTU 1500. See Steven M. Bellovin, "A Technique
170 * for Counting NATted Hosts", Proc. IMW'02, available at
171 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
172 */
173 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
174 ip->ip_v = IPVERSION;
175 ip->ip_hl = hlen >> 2;
176 ip->ip_id = ip_newid();
177 ipstat.ips_localout++;
178 } else {
179 hlen = ip->ip_hl << 2;
180 }
181
182 dst = (struct sockaddr_in *)&ro->ro_dst;
183 again:
184 /*
185 * If there is a cached route,
186 * check that it is to the same destination
187 * and is still up. If not, free it and try again.
188 * The address family should also be checked in case of sharing the
189 * cache with IPv6.
190 */
191 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
192 dst->sin_family != AF_INET ||
193 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
194 RTFREE(ro->ro_rt);
195 ro->ro_rt = (struct rtentry *)0;
196 }
197 #ifdef IPFIREWALL_FORWARD
198 if (ro->ro_rt == NULL && fwd_tag == NULL) {
199 #else
200 if (ro->ro_rt == NULL) {
201 #endif
202 bzero(dst, sizeof(*dst));
203 dst->sin_family = AF_INET;
204 dst->sin_len = sizeof(*dst);
205 dst->sin_addr = ip->ip_dst;
206 }
207 /*
208 * If routing to interface only, short circuit routing lookup.
209 * The use of an all-ones broadcast address implies this; an
210 * interface is specified by the broadcast address of an interface,
211 * or the destination address of a ptp interface.
212 */
213 if (flags & IP_SENDONES) {
214 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
215 (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
216 ipstat.ips_noroute++;
217 error = ENETUNREACH;
218 goto bad;
219 }
220 ip->ip_dst.s_addr = INADDR_BROADCAST;
221 dst->sin_addr = ip->ip_dst;
222 ifp = ia->ia_ifp;
223 ip->ip_ttl = 1;
224 isbroadcast = 1;
225 } else if (flags & IP_ROUTETOIF) {
226 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
227 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
228 ipstat.ips_noroute++;
229 error = ENETUNREACH;
230 goto bad;
231 }
232 ifp = ia->ia_ifp;
233 ip->ip_ttl = 1;
234 isbroadcast = in_broadcast(dst->sin_addr, ifp);
235 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
236 imo != NULL && imo->imo_multicast_ifp != NULL) {
237 /*
238 * Bypass the normal routing lookup for multicast
239 * packets if the interface is specified.
240 */
241 ifp = imo->imo_multicast_ifp;
242 IFP_TO_IA(ifp, ia);
243 isbroadcast = 0; /* fool gcc */
244 } else {
245 /*
246 * We want to do any cloning requested by the link layer,
247 * as this is probably required in all cases for correct
248 * operation (as it is for ARP).
249 */
250 if (ro->ro_rt == NULL)
251 rtalloc_ign(ro, 0);
252 if (ro->ro_rt == NULL) {
253 ipstat.ips_noroute++;
254 error = EHOSTUNREACH;
255 goto bad;
256 }
257 ia = ifatoia(ro->ro_rt->rt_ifa);
258 ifp = ro->ro_rt->rt_ifp;
259 ro->ro_rt->rt_rmx.rmx_pksent++;
260 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
261 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
262 if (ro->ro_rt->rt_flags & RTF_HOST)
263 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
264 else
265 isbroadcast = in_broadcast(dst->sin_addr, ifp);
266 }
267 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
268 struct in_multi *inm;
269
270 m->m_flags |= M_MCAST;
271 /*
272 * IP destination address is multicast. Make sure "dst"
273 * still points to the address in "ro". (It may have been
274 * changed to point to a gateway address, above.)
275 */
276 dst = (struct sockaddr_in *)&ro->ro_dst;
277 /*
278 * See if the caller provided any multicast options
279 */
280 if (imo != NULL) {
281 ip->ip_ttl = imo->imo_multicast_ttl;
282 if (imo->imo_multicast_vif != -1)
283 ip->ip_src.s_addr =
284 ip_mcast_src ?
285 ip_mcast_src(imo->imo_multicast_vif) :
286 INADDR_ANY;
287 } else
288 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
289 /*
290 * Confirm that the outgoing interface supports multicast.
291 */
292 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
293 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
294 ipstat.ips_noroute++;
295 error = ENETUNREACH;
296 goto bad;
297 }
298 }
299 /*
300 * If source address not specified yet, use address
301 * of outgoing interface.
302 */
303 if (ip->ip_src.s_addr == INADDR_ANY) {
304 /* Interface may have no addresses. */
305 if (ia != NULL)
306 ip->ip_src = IA_SIN(ia)->sin_addr;
307 }
308
309 IN_MULTI_LOCK();
310 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
311 if (inm != NULL &&
312 (imo == NULL || imo->imo_multicast_loop)) {
313 IN_MULTI_UNLOCK();
314 /*
315 * If we belong to the destination multicast group
316 * on the outgoing interface, and the caller did not
317 * forbid loopback, loop back a copy.
318 */
319 ip_mloopback(ifp, m, dst, hlen);
320 }
321 else {
322 IN_MULTI_UNLOCK();
323 /*
324 * If we are acting as a multicast router, perform
325 * multicast forwarding as if the packet had just
326 * arrived on the interface to which we are about
327 * to send. The multicast forwarding function
328 * recursively calls this function, using the
329 * IP_FORWARDING flag to prevent infinite recursion.
330 *
331 * Multicasts that are looped back by ip_mloopback(),
332 * above, will be forwarded by the ip_input() routine,
333 * if necessary.
334 */
335 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
336 /*
337 * If rsvp daemon is not running, do not
338 * set ip_moptions. This ensures that the packet
339 * is multicast and not just sent down one link
340 * as prescribed by rsvpd.
341 */
342 if (!rsvp_on)
343 imo = NULL;
344 if (ip_mforward &&
345 ip_mforward(ip, ifp, m, imo) != 0) {
346 m_freem(m);
347 goto done;
348 }
349 }
350 }
351
352 /*
353 * Multicasts with a time-to-live of zero may be looped-
354 * back, above, but must not be transmitted on a network.
355 * Also, multicasts addressed to the loopback interface
356 * are not sent -- the above call to ip_mloopback() will
357 * loop back a copy if this host actually belongs to the
358 * destination group on the loopback interface.
359 */
360 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
361 m_freem(m);
362 goto done;
363 }
364
365 goto sendit;
366 }
367 #ifndef notdef
368 /*
369 * If the source address is not specified yet, use the address
370 * of the outoing interface.
371 */
372 if (ip->ip_src.s_addr == INADDR_ANY) {
373 /* Interface may have no addresses. */
374 if (ia != NULL) {
375 ip->ip_src = IA_SIN(ia)->sin_addr;
376 }
377 }
378 #endif /* notdef */
379 /*
380 * Verify that we have any chance at all of being able to queue the
381 * packet or packet fragments, unless ALTQ is enabled on the given
382 * interface in which case packetdrop should be done by queueing.
383 */
384 #ifdef ALTQ
385 if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
386 ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
387 ifp->if_snd.ifq_maxlen))
388 #else
389 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
390 ifp->if_snd.ifq_maxlen)
391 #endif /* ALTQ */
392 {
393 error = ENOBUFS;
394 ipstat.ips_odropped++;
395 ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
396 goto bad;
397 }
398
399 /*
400 * Look for broadcast address and
401 * verify user is allowed to send
402 * such a packet.
403 */
404 if (isbroadcast) {
405 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
406 error = EADDRNOTAVAIL;
407 goto bad;
408 }
409 if ((flags & IP_ALLOWBROADCAST) == 0) {
410 error = EACCES;
411 goto bad;
412 }
413 /* don't allow broadcast messages to be fragmented */
414 if (ip->ip_len > ifp->if_mtu) {
415 error = EMSGSIZE;
416 goto bad;
417 }
418 m->m_flags |= M_BCAST;
419 } else {
420 m->m_flags &= ~M_BCAST;
421 }
422
423 sendit:
424 #ifdef IPSEC
425 /* get SP for this packet */
426 if (inp == NULL)
427 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
428 flags, &error);
429 else
430 sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
431
432 if (sp == NULL) {
433 ipsecstat.out_inval++;
434 goto bad;
435 }
436
437 error = 0;
438
439 /* check policy */
440 switch (sp->policy) {
441 case IPSEC_POLICY_DISCARD:
442 /*
443 * This packet is just discarded.
444 */
445 ipsecstat.out_polvio++;
446 goto bad;
447
448 case IPSEC_POLICY_BYPASS:
449 case IPSEC_POLICY_NONE:
450 case IPSEC_POLICY_TCP:
451 /* no need to do IPsec. */
452 goto skip_ipsec;
453
454 case IPSEC_POLICY_IPSEC:
455 if (sp->req == NULL) {
456 /* acquire a policy */
457 error = key_spdacquire(sp);
458 goto bad;
459 }
460 break;
461
462 case IPSEC_POLICY_ENTRUST:
463 default:
464 printf("ip_output: Invalid policy found. %d\n", sp->policy);
465 }
466 {
467 struct ipsec_output_state state;
468 bzero(&state, sizeof(state));
469 state.m = m;
470 if (flags & IP_ROUTETOIF) {
471 state.ro = &iproute;
472 bzero(&iproute, sizeof(iproute));
473 } else
474 state.ro = ro;
475 state.dst = (struct sockaddr *)dst;
476
477 ip->ip_sum = 0;
478
479 /*
480 * XXX
481 * delayed checksums are not currently compatible with IPsec
482 */
483 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
484 in_delayed_cksum(m);
485 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
486 }
487
488 ip->ip_len = htons(ip->ip_len);
489 ip->ip_off = htons(ip->ip_off);
490
491 error = ipsec4_output(&state, sp, flags);
492
493 m = state.m;
494 if (flags & IP_ROUTETOIF) {
495 /*
496 * if we have tunnel mode SA, we may need to ignore
497 * IP_ROUTETOIF.
498 */
499 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
500 flags &= ~IP_ROUTETOIF;
501 ro = state.ro;
502 }
503 } else
504 ro = state.ro;
505 dst = (struct sockaddr_in *)state.dst;
506 if (error) {
507 /* mbuf is already reclaimed in ipsec4_output. */
508 m = NULL;
509 switch (error) {
510 case EHOSTUNREACH:
511 case ENETUNREACH:
512 case EMSGSIZE:
513 case ENOBUFS:
514 case ENOMEM:
515 break;
516 default:
517 printf("ip4_output (ipsec): error code %d\n", error);
518 /*fall through*/
519 case ENOENT:
520 /* don't show these error codes to the user */
521 error = 0;
522 break;
523 }
524 goto bad;
525 }
526
527 /* be sure to update variables that are affected by ipsec4_output() */
528 ip = mtod(m, struct ip *);
529 hlen = ip->ip_hl << 2;
530 if (ro->ro_rt == NULL) {
531 if ((flags & IP_ROUTETOIF) == 0) {
532 printf("ip_output: "
533 "can't update route after IPsec processing\n");
534 error = EHOSTUNREACH; /*XXX*/
535 goto bad;
536 }
537 } else {
538 if (state.encap) {
539 ia = ifatoia(ro->ro_rt->rt_ifa);
540 ifp = ro->ro_rt->rt_ifp;
541 }
542 }
543 }
544
545 /* make it flipped, again. */
546 ip->ip_len = ntohs(ip->ip_len);
547 ip->ip_off = ntohs(ip->ip_off);
548 skip_ipsec:
549 #endif /*IPSEC*/
550 #ifdef FAST_IPSEC
551 /*
552 * Check the security policy (SP) for the packet and, if
553 * required, do IPsec-related processing. There are two
554 * cases here; the first time a packet is sent through
555 * it will be untagged and handled by ipsec4_checkpolicy.
556 * If the packet is resubmitted to ip_output (e.g. after
557 * AH, ESP, etc. processing), there will be a tag to bypass
558 * the lookup and related policy checking.
559 */
560 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
561 s = splnet();
562 if (mtag != NULL) {
563 tdbi = (struct tdb_ident *)(mtag + 1);
564 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
565 if (sp == NULL)
566 error = -EINVAL; /* force silent drop */
567 m_tag_delete(m, mtag);
568 } else {
569 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
570 &error, inp);
571 }
572 /*
573 * There are four return cases:
574 * sp != NULL apply IPsec policy
575 * sp == NULL, error == 0 no IPsec handling needed
576 * sp == NULL, error == -EINVAL discard packet w/o error
577 * sp == NULL, error != 0 discard packet, report error
578 */
579 if (sp != NULL) {
580 /* Loop detection, check if ipsec processing already done */
581 KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
582 for (mtag = m_tag_first(m); mtag != NULL;
583 mtag = m_tag_next(m, mtag)) {
584 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
585 continue;
586 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
587 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
588 continue;
589 /*
590 * Check if policy has an SA associated with it.
591 * This can happen when an SP has yet to acquire
592 * an SA; e.g. on first reference. If it occurs,
593 * then we let ipsec4_process_packet do its thing.
594 */
595 if (sp->req->sav == NULL)
596 break;
597 tdbi = (struct tdb_ident *)(mtag + 1);
598 if (tdbi->spi == sp->req->sav->spi &&
599 tdbi->proto == sp->req->sav->sah->saidx.proto &&
600 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
601 sizeof (union sockaddr_union)) == 0) {
602 /*
603 * No IPsec processing is needed, free
604 * reference to SP.
605 *
606 * NB: null pointer to avoid free at
607 * done: below.
608 */
609 KEY_FREESP(&sp), sp = NULL;
610 splx(s);
611 goto spd_done;
612 }
613 }
614
615 /*
616 * Do delayed checksums now because we send before
617 * this is done in the normal processing path.
618 */
619 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
620 in_delayed_cksum(m);
621 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
622 }
623
624 ip->ip_len = htons(ip->ip_len);
625 ip->ip_off = htons(ip->ip_off);
626
627 /* NB: callee frees mbuf */
628 error = ipsec4_process_packet(m, sp->req, flags, 0);
629 /*
630 * Preserve KAME behaviour: ENOENT can be returned
631 * when an SA acquire is in progress. Don't propagate
632 * this to user-level; it confuses applications.
633 *
634 * XXX this will go away when the SADB is redone.
635 */
636 if (error == ENOENT)
637 error = 0;
638 splx(s);
639 goto done;
640 } else {
641 splx(s);
642
643 if (error != 0) {
644 /*
645 * Hack: -EINVAL is used to signal that a packet
646 * should be silently discarded. This is typically
647 * because we asked key management for an SA and
648 * it was delayed (e.g. kicked up to IKE).
649 */
650 if (error == -EINVAL)
651 error = 0;
652 goto bad;
653 } else {
654 /* No IPsec processing for this packet. */
655 }
656 #ifdef notyet
657 /*
658 * If deferred crypto processing is needed, check that
659 * the interface supports it.
660 */
661 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
662 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
663 /* notify IPsec to do its own crypto */
664 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
665 error = EHOSTUNREACH;
666 goto bad;
667 }
668 #endif
669 }
670 spd_done:
671 #endif /* FAST_IPSEC */
672
673 /* Jump over all PFIL processing if hooks are not active. */
674 if (inet_pfil_hook.ph_busy_count == -1)
675 goto passout;
676
677 /* Run through list of hooks for output packets. */
678 odst.s_addr = ip->ip_dst.s_addr;
679 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
680 if (error != 0 || m == NULL)
681 goto done;
682
683 ip = mtod(m, struct ip *);
684
685 /* See if destination IP address was changed by packet filter. */
686 if (odst.s_addr != ip->ip_dst.s_addr) {
687 m->m_flags |= M_SKIP_FIREWALL;
688 /* If destination is now ourself drop to ip_input(). */
689 if (in_localip(ip->ip_dst)) {
690 m->m_flags |= M_FASTFWD_OURS;
691 if (m->m_pkthdr.rcvif == NULL)
692 m->m_pkthdr.rcvif = loif;
693 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
694 m->m_pkthdr.csum_flags |=
695 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
696 m->m_pkthdr.csum_data = 0xffff;
697 }
698 m->m_pkthdr.csum_flags |=
699 CSUM_IP_CHECKED | CSUM_IP_VALID;
700
701 error = netisr_queue(NETISR_IP, m);
702 goto done;
703 } else
704 goto again; /* Redo the routing table lookup. */
705 }
706
707 #ifdef IPFIREWALL_FORWARD
708 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
709 if (m->m_flags & M_FASTFWD_OURS) {
710 if (m->m_pkthdr.rcvif == NULL)
711 m->m_pkthdr.rcvif = loif;
712 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
713 m->m_pkthdr.csum_flags |=
714 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
715 m->m_pkthdr.csum_data = 0xffff;
716 }
717 m->m_pkthdr.csum_flags |=
718 CSUM_IP_CHECKED | CSUM_IP_VALID;
719
720 error = netisr_queue(NETISR_IP, m);
721 goto done;
722 }
723 /* Or forward to some other address? */
724 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
725 if (fwd_tag) {
726 dst = (struct sockaddr_in *)&ro->ro_dst;
727 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
728 m->m_flags |= M_SKIP_FIREWALL;
729 m_tag_delete(m, fwd_tag);
730 goto again;
731 }
732 #endif /* IPFIREWALL_FORWARD */
733
734 passout:
735 /* 127/8 must not appear on wire - RFC1122. */
736 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
737 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
738 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
739 ipstat.ips_badaddr++;
740 error = EADDRNOTAVAIL;
741 goto bad;
742 }
743 }
744
745 m->m_pkthdr.csum_flags |= CSUM_IP;
746 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
747 if (sw_csum & CSUM_DELAY_DATA) {
748 in_delayed_cksum(m);
749 sw_csum &= ~CSUM_DELAY_DATA;
750 }
751 m->m_pkthdr.csum_flags &= ifp->if_hwassist;
752
753 /*
754 * If small enough for interface, or the interface will take
755 * care of the fragmentation for us, can just send directly.
756 */
757 if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
758 ((ip->ip_off & IP_DF) == 0))) {
759 ip->ip_len = htons(ip->ip_len);
760 ip->ip_off = htons(ip->ip_off);
761 ip->ip_sum = 0;
762 if (sw_csum & CSUM_DELAY_IP)
763 ip->ip_sum = in_cksum(m, hlen);
764
765 /* Record statistics for this interface address. */
766 if (!(flags & IP_FORWARDING) && ia) {
767 ia->ia_ifa.if_opackets++;
768 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
769 }
770
771 #ifdef IPSEC
772 /* clean ipsec history once it goes out of the node */
773 ipsec_delaux(m);
774 #endif
775
776 #ifdef MBUF_STRESS_TEST
777 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
778 m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
779 #endif
780 error = (*ifp->if_output)(ifp, m,
781 (struct sockaddr *)dst, ro->ro_rt);
782 goto done;
783 }
784
785 if (ip->ip_off & IP_DF) {
786 error = EMSGSIZE;
787 /*
788 * This case can happen if the user changed the MTU
789 * of an interface after enabling IP on it. Because
790 * most netifs don't keep track of routes pointing to
791 * them, there is no way for one to update all its
792 * routes when the MTU is changed.
793 */
794 if (ro != NULL &&
795 (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
796 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
797 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
798 }
799 ipstat.ips_cantfrag++;
800 goto bad;
801 }
802
803 /*
804 * Too large for interface; fragment if possible. If successful,
805 * on return, m will point to a list of packets to be sent.
806 */
807 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
808 if (error)
809 goto bad;
810 for (; m; m = m0) {
811 m0 = m->m_nextpkt;
812 m->m_nextpkt = 0;
813 #ifdef IPSEC
814 /* clean ipsec history once it goes out of the node */
815 ipsec_delaux(m);
816 #endif
817 if (error == 0) {
818 /* Record statistics for this interface address. */
819 if (ia != NULL) {
820 ia->ia_ifa.if_opackets++;
821 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
822 }
823
824 error = (*ifp->if_output)(ifp, m,
825 (struct sockaddr *)dst, ro->ro_rt);
826 } else
827 m_freem(m);
828 }
829
830 if (error == 0)
831 ipstat.ips_fragmented++;
832
833 done:
834 if (ro == &iproute && ro->ro_rt) {
835 RTFREE(ro->ro_rt);
836 }
837 #ifdef IPSEC
838 if (sp != NULL) {
839 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
840 printf("DP ip_output call free SP:%p\n", sp));
841 key_freesp(sp);
842 }
843 #endif
844 #ifdef FAST_IPSEC
845 if (sp != NULL)
846 KEY_FREESP(&sp);
847 #endif
848 return (error);
849 bad:
850 m_freem(m);
851 goto done;
852 }
853
854 /*
855 * Create a chain of fragments which fit the given mtu. m_frag points to the
856 * mbuf to be fragmented; on return it points to the chain with the fragments.
857 * Return 0 if no error. If error, m_frag may contain a partially built
858 * chain of fragments that should be freed by the caller.
859 *
860 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
861 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
862 */
863 int
864 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
865 u_long if_hwassist_flags, int sw_csum)
866 {
867 int error = 0;
868 int hlen = ip->ip_hl << 2;
869 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */
870 int off;
871 struct mbuf *m0 = *m_frag; /* the original packet */
872 int firstlen;
873 struct mbuf **mnext;
874 int nfrags;
875
876 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
877 ipstat.ips_cantfrag++;
878 return EMSGSIZE;
879 }
880
881 /*
882 * Must be able to put at least 8 bytes per fragment.
883 */
884 if (len < 8)
885 return EMSGSIZE;
886
887 /*
888 * If the interface will not calculate checksums on
889 * fragmented packets, then do it here.
890 */
891 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
892 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
893 in_delayed_cksum(m0);
894 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
895 }
896
897 if (len > PAGE_SIZE) {
898 /*
899 * Fragment large datagrams such that each segment
900 * contains a multiple of PAGE_SIZE amount of data,
901 * plus headers. This enables a receiver to perform
902 * page-flipping zero-copy optimizations.
903 *
904 * XXX When does this help given that sender and receiver
905 * could have different page sizes, and also mtu could
906 * be less than the receiver's page size ?
907 */
908 int newlen;
909 struct mbuf *m;
910
911 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
912 off += m->m_len;
913
914 /*
915 * firstlen (off - hlen) must be aligned on an
916 * 8-byte boundary
917 */
918 if (off < hlen)
919 goto smart_frag_failure;
920 off = ((off - hlen) & ~7) + hlen;
921 newlen = (~PAGE_MASK) & mtu;
922 if ((newlen + sizeof (struct ip)) > mtu) {
923 /* we failed, go back the default */
924 smart_frag_failure:
925 newlen = len;
926 off = hlen + len;
927 }
928 len = newlen;
929
930 } else {
931 off = hlen + len;
932 }
933
934 firstlen = off - hlen;
935 mnext = &m0->m_nextpkt; /* pointer to next packet */
936
937 /*
938 * Loop through length of segment after first fragment,
939 * make new header and copy data of each part and link onto chain.
940 * Here, m0 is the original packet, m is the fragment being created.
941 * The fragments are linked off the m_nextpkt of the original
942 * packet, which after processing serves as the first fragment.
943 */
944 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
945 struct ip *mhip; /* ip header on the fragment */
946 struct mbuf *m;
947 int mhlen = sizeof (struct ip);
948
949 MGETHDR(m, M_DONTWAIT, MT_HEADER);
950 if (m == NULL) {
951 error = ENOBUFS;
952 ipstat.ips_odropped++;
953 goto done;
954 }
955 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
956 /*
957 * In the first mbuf, leave room for the link header, then
958 * copy the original IP header including options. The payload
959 * goes into an additional mbuf chain returned by m_copy().
960 */
961 m->m_data += max_linkhdr;
962 mhip = mtod(m, struct ip *);
963 *mhip = *ip;
964 if (hlen > sizeof (struct ip)) {
965 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
966 mhip->ip_v = IPVERSION;
967 mhip->ip_hl = mhlen >> 2;
968 }
969 m->m_len = mhlen;
970 /* XXX do we need to add ip->ip_off below ? */
971 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
972 if (off + len >= ip->ip_len) { /* last fragment */
973 len = ip->ip_len - off;
974 m->m_flags |= M_LASTFRAG;
975 } else
976 mhip->ip_off |= IP_MF;
977 mhip->ip_len = htons((u_short)(len + mhlen));
978 m->m_next = m_copy(m0, off, len);
979 if (m->m_next == NULL) { /* copy failed */
980 m_free(m);
981 error = ENOBUFS; /* ??? */
982 ipstat.ips_odropped++;
983 goto done;
984 }
985 m->m_pkthdr.len = mhlen + len;
986 m->m_pkthdr.rcvif = NULL;
987 #ifdef MAC
988 mac_create_fragment(m0, m);
989 #endif
990 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
991 mhip->ip_off = htons(mhip->ip_off);
992 mhip->ip_sum = 0;
993 if (sw_csum & CSUM_DELAY_IP)
994 mhip->ip_sum = in_cksum(m, mhlen);
995 *mnext = m;
996 mnext = &m->m_nextpkt;
997 }
998 ipstat.ips_ofragments += nfrags;
999
1000 /* set first marker for fragment chain */
1001 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1002 m0->m_pkthdr.csum_data = nfrags;
1003
1004 /*
1005 * Update first fragment by trimming what's been copied out
1006 * and updating header.
1007 */
1008 m_adj(m0, hlen + firstlen - ip->ip_len);
1009 m0->m_pkthdr.len = hlen + firstlen;
1010 ip->ip_len = htons((u_short)m0->m_pkthdr.len);
1011 ip->ip_off |= IP_MF;
1012 ip->ip_off = htons(ip->ip_off);
1013 ip->ip_sum = 0;
1014 if (sw_csum & CSUM_DELAY_IP)
1015 ip->ip_sum = in_cksum(m0, hlen);
1016
1017 done:
1018 *m_frag = m0;
1019 return error;
1020 }
1021
1022 void
1023 in_delayed_cksum(struct mbuf *m)
1024 {
1025 struct ip *ip;
1026 u_short csum, offset;
1027
1028 ip = mtod(m, struct ip *);
1029 offset = ip->ip_hl << 2 ;
1030 csum = in_cksum_skip(m, ip->ip_len, offset);
1031 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1032 csum = 0xffff;
1033 offset += m->m_pkthdr.csum_data; /* checksum offset */
1034
1035 if (offset + sizeof(u_short) > m->m_len) {
1036 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1037 m->m_len, offset, ip->ip_p);
1038 /*
1039 * XXX
1040 * this shouldn't happen, but if it does, the
1041 * correct behavior may be to insert the checksum
1042 * in the appropriate next mbuf in the chain.
1043 */
1044 return;
1045 }
1046 *(u_short *)(m->m_data + offset) = csum;
1047 }
1048
1049 /*
1050 * Insert IP options into preformed packet.
1051 * Adjust IP destination as required for IP source routing,
1052 * as indicated by a non-zero in_addr at the start of the options.
1053 *
1054 * XXX This routine assumes that the packet has no options in place.
1055 */
1056 static struct mbuf *
1057 ip_insertoptions(m, opt, phlen)
1058 register struct mbuf *m;
1059 struct mbuf *opt;
1060 int *phlen;
1061 {
1062 register struct ipoption *p = mtod(opt, struct ipoption *);
1063 struct mbuf *n;
1064 register struct ip *ip = mtod(m, struct ip *);
1065 unsigned optlen;
1066
1067 optlen = opt->m_len - sizeof(p->ipopt_dst);
1068 if (optlen + ip->ip_len > IP_MAXPACKET) {
1069 *phlen = 0;
1070 return (m); /* XXX should fail */
1071 }
1072 if (p->ipopt_dst.s_addr)
1073 ip->ip_dst = p->ipopt_dst;
1074 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1075 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1076 if (n == NULL) {
1077 *phlen = 0;
1078 return (m);
1079 }
1080 M_MOVE_PKTHDR(n, m);
1081 n->m_pkthdr.rcvif = NULL;
1082 #ifdef MAC
1083 mac_copy_mbuf(m, n);
1084 #endif
1085 n->m_pkthdr.len += optlen;
1086 m->m_len -= sizeof(struct ip);
1087 m->m_data += sizeof(struct ip);
1088 n->m_next = m;
1089 m = n;
1090 m->m_len = optlen + sizeof(struct ip);
1091 m->m_data += max_linkhdr;
1092 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1093 } else {
1094 m->m_data -= optlen;
1095 m->m_len += optlen;
1096 m->m_pkthdr.len += optlen;
1097 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1098 }
1099 ip = mtod(m, struct ip *);
1100 bcopy(p->ipopt_list, ip + 1, optlen);
1101 *phlen = sizeof(struct ip) + optlen;
1102 ip->ip_v = IPVERSION;
1103 ip->ip_hl = *phlen >> 2;
1104 ip->ip_len += optlen;
1105 return (m);
1106 }
1107
1108 /*
1109 * Copy options from ip to jp,
1110 * omitting those not copied during fragmentation.
1111 */
1112 int
1113 ip_optcopy(ip, jp)
1114 struct ip *ip, *jp;
1115 {
1116 register u_char *cp, *dp;
1117 int opt, optlen, cnt;
1118
1119 cp = (u_char *)(ip + 1);
1120 dp = (u_char *)(jp + 1);
1121 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1122 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1123 opt = cp[0];
1124 if (opt == IPOPT_EOL)
1125 break;
1126 if (opt == IPOPT_NOP) {
1127 /* Preserve for IP mcast tunnel's LSRR alignment. */
1128 *dp++ = IPOPT_NOP;
1129 optlen = 1;
1130 continue;
1131 }
1132
1133 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
1134 ("ip_optcopy: malformed ipv4 option"));
1135 optlen = cp[IPOPT_OLEN];
1136 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
1137 ("ip_optcopy: malformed ipv4 option"));
1138
1139 /* bogus lengths should have been caught by ip_dooptions */
1140 if (optlen > cnt)
1141 optlen = cnt;
1142 if (IPOPT_COPIED(opt)) {
1143 bcopy(cp, dp, optlen);
1144 dp += optlen;
1145 }
1146 }
1147 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1148 *dp++ = IPOPT_EOL;
1149 return (optlen);
1150 }
1151
1152 /*
1153 * IP socket option processing.
1154 *
1155 * There are two versions of this call in order to work around a race
1156 * condition in TCP in FreeBSD 6.x. In the TCP implementation, so->so_pcb
1157 * can become NULL if the pcb or pcbinfo lock isn't held. However, when
1158 * entering ip_ctloutput(), neither lock is held, and finding the pointer to
1159 * either lock requires follow so->so_pcb, which may be NULL.
1160 * ip_ctloutput_pcbinfo() accepts the pcbinfo pointer so that the lock can be
1161 * safely acquired. This is not required in FreeBSD 7.x because the
1162 * invariants on so->so_pcb are much stronger, so it cannot become NULL
1163 * while the socket is in use.
1164 */
1165 int
1166 ip_ctloutput_pcbinfo(so, sopt, pcbinfo)
1167 struct socket *so;
1168 struct sockopt *sopt;
1169 struct inpcbinfo *pcbinfo;
1170 {
1171 struct inpcb *inp = sotoinpcb(so);
1172 int error, optval;
1173
1174 if (pcbinfo == NULL)
1175 pcbinfo = inp->inp_pcbinfo;
1176
1177 error = optval = 0;
1178 if (sopt->sopt_level != IPPROTO_IP) {
1179 return (EINVAL);
1180 }
1181
1182 if (inp == NULL)
1183 return (EINVAL);
1184
1185 switch (sopt->sopt_dir) {
1186 case SOPT_SET:
1187 switch (sopt->sopt_name) {
1188 case IP_OPTIONS:
1189 #ifdef notyet
1190 case IP_RETOPTS:
1191 #endif
1192 {
1193 struct mbuf *m;
1194 if (sopt->sopt_valsize > MLEN) {
1195 error = EMSGSIZE;
1196 break;
1197 }
1198 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
1199 if (m == NULL) {
1200 error = ENOBUFS;
1201 break;
1202 }
1203 m->m_len = sopt->sopt_valsize;
1204 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1205 m->m_len);
1206 if (error) {
1207 m_free(m);
1208 break;
1209 }
1210 INP_INFO_WLOCK(pcbinfo);
1211 if (so->so_pcb == NULL) {
1212 INP_INFO_WUNLOCK(pcbinfo);
1213 m_free(m);
1214 error = EINVAL;
1215 break;
1216 }
1217 INP_LOCK(inp);
1218 INP_INFO_WUNLOCK(pcbinfo);
1219 error = ip_pcbopts(inp, sopt->sopt_name, m);
1220 INP_UNLOCK(inp);
1221 return (error);
1222 }
1223
1224 case IP_TOS:
1225 case IP_TTL:
1226 case IP_MINTTL:
1227 case IP_RECVOPTS:
1228 case IP_RECVRETOPTS:
1229 case IP_RECVDSTADDR:
1230 case IP_RECVTTL:
1231 case IP_RECVIF:
1232 case IP_FAITH:
1233 case IP_ONESBCAST:
1234 case IP_DONTFRAG:
1235 error = sooptcopyin(sopt, &optval, sizeof optval,
1236 sizeof optval);
1237 if (error)
1238 break;
1239
1240 INP_INFO_WLOCK(pcbinfo);
1241 if (so->so_pcb == NULL) {
1242 INP_INFO_WUNLOCK(pcbinfo);
1243 error = EINVAL;
1244 break;
1245 }
1246 INP_LOCK(inp);
1247 INP_INFO_WUNLOCK(pcbinfo);
1248 switch (sopt->sopt_name) {
1249 case IP_TOS:
1250 inp->inp_ip_tos = optval;
1251 break;
1252
1253 case IP_TTL:
1254 inp->inp_ip_ttl = optval;
1255 break;
1256
1257 case IP_MINTTL:
1258 if (optval > 0 && optval <= MAXTTL)
1259 inp->inp_ip_minttl = optval;
1260 else
1261 error = EINVAL;
1262 break;
1263
1264 #define OPTSET(bit) do { \
1265 INP_LOCK(inp); \
1266 if (optval) \
1267 inp->inp_flags |= bit; \
1268 else \
1269 inp->inp_flags &= ~bit; \
1270 INP_UNLOCK(inp); \
1271 } while (0)
1272
1273 case IP_RECVOPTS:
1274 OPTSET(INP_RECVOPTS);
1275 break;
1276
1277 case IP_RECVRETOPTS:
1278 OPTSET(INP_RECVRETOPTS);
1279 break;
1280
1281 case IP_RECVDSTADDR:
1282 OPTSET(INP_RECVDSTADDR);
1283 break;
1284
1285 case IP_RECVTTL:
1286 OPTSET(INP_RECVTTL);
1287 break;
1288
1289 case IP_RECVIF:
1290 OPTSET(INP_RECVIF);
1291 break;
1292
1293 case IP_FAITH:
1294 OPTSET(INP_FAITH);
1295 break;
1296
1297 case IP_ONESBCAST:
1298 OPTSET(INP_ONESBCAST);
1299 break;
1300 case IP_DONTFRAG:
1301 OPTSET(INP_DONTFRAG);
1302 break;
1303 }
1304 INP_UNLOCK(inp);
1305 break;
1306 #undef OPTSET
1307
1308 case IP_MULTICAST_IF:
1309 case IP_MULTICAST_VIF:
1310 case IP_MULTICAST_TTL:
1311 case IP_MULTICAST_LOOP:
1312 case IP_ADD_MEMBERSHIP:
1313 case IP_DROP_MEMBERSHIP:
1314 error = ip_setmoptions(inp, sopt);
1315 break;
1316
1317 case IP_PORTRANGE:
1318 error = sooptcopyin(sopt, &optval, sizeof optval,
1319 sizeof optval);
1320 if (error)
1321 break;
1322
1323 INP_INFO_WLOCK(pcbinfo);
1324 if (so->so_pcb == NULL) {
1325 error = EINVAL;
1326 break;
1327 }
1328 INP_LOCK(inp);
1329 INP_INFO_WUNLOCK(pcbinfo);
1330 switch (optval) {
1331 case IP_PORTRANGE_DEFAULT:
1332 inp->inp_flags &= ~(INP_LOWPORT);
1333 inp->inp_flags &= ~(INP_HIGHPORT);
1334 break;
1335
1336 case IP_PORTRANGE_HIGH:
1337 inp->inp_flags &= ~(INP_LOWPORT);
1338 inp->inp_flags |= INP_HIGHPORT;
1339 break;
1340
1341 case IP_PORTRANGE_LOW:
1342 inp->inp_flags &= ~(INP_HIGHPORT);
1343 inp->inp_flags |= INP_LOWPORT;
1344 break;
1345
1346 default:
1347 error = EINVAL;
1348 break;
1349 }
1350 INP_UNLOCK(inp);
1351 break;
1352
1353 #if defined(IPSEC) || defined(FAST_IPSEC)
1354 case IP_IPSEC_POLICY:
1355 {
1356 caddr_t req;
1357 size_t len = 0;
1358 int priv;
1359 struct mbuf *m;
1360 int optname;
1361
1362 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1363 break;
1364 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1365 break;
1366 priv = (sopt->sopt_td != NULL &&
1367 suser(sopt->sopt_td) != 0) ? 0 : 1;
1368 req = mtod(m, caddr_t);
1369 len = m->m_len;
1370 optname = sopt->sopt_name;
1371 if (so->so_pcb == NULL) {
1372 m_free(m);
1373 error = EINVAL;
1374 break;
1375 }
1376 error = ipsec4_set_policy(inp, optname, req, len, priv);
1377 m_freem(m);
1378 break;
1379 }
1380 #endif /*IPSEC*/
1381
1382 default:
1383 error = ENOPROTOOPT;
1384 break;
1385 }
1386 break;
1387
1388 case SOPT_GET:
1389 switch (sopt->sopt_name) {
1390 case IP_OPTIONS:
1391 case IP_RETOPTS:
1392 if (inp->inp_options)
1393 error = sooptcopyout(sopt,
1394 mtod(inp->inp_options,
1395 char *),
1396 inp->inp_options->m_len);
1397 else
1398 sopt->sopt_valsize = 0;
1399 break;
1400
1401 case IP_TOS:
1402 case IP_TTL:
1403 case IP_MINTTL:
1404 case IP_RECVOPTS:
1405 case IP_RECVRETOPTS:
1406 case IP_RECVDSTADDR:
1407 case IP_RECVTTL:
1408 case IP_RECVIF:
1409 case IP_PORTRANGE:
1410 case IP_FAITH:
1411 case IP_ONESBCAST:
1412 case IP_DONTFRAG:
1413 switch (sopt->sopt_name) {
1414
1415 case IP_TOS:
1416 optval = inp->inp_ip_tos;
1417 break;
1418
1419 case IP_TTL:
1420 optval = inp->inp_ip_ttl;
1421 break;
1422
1423 case IP_MINTTL:
1424 optval = inp->inp_ip_minttl;
1425 break;
1426
1427 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1428
1429 case IP_RECVOPTS:
1430 optval = OPTBIT(INP_RECVOPTS);
1431 break;
1432
1433 case IP_RECVRETOPTS:
1434 optval = OPTBIT(INP_RECVRETOPTS);
1435 break;
1436
1437 case IP_RECVDSTADDR:
1438 optval = OPTBIT(INP_RECVDSTADDR);
1439 break;
1440
1441 case IP_RECVTTL:
1442 optval = OPTBIT(INP_RECVTTL);
1443 break;
1444
1445 case IP_RECVIF:
1446 optval = OPTBIT(INP_RECVIF);
1447 break;
1448
1449 case IP_PORTRANGE:
1450 if (inp->inp_flags & INP_HIGHPORT)
1451 optval = IP_PORTRANGE_HIGH;
1452 else if (inp->inp_flags & INP_LOWPORT)
1453 optval = IP_PORTRANGE_LOW;
1454 else
1455 optval = 0;
1456 break;
1457
1458 case IP_FAITH:
1459 optval = OPTBIT(INP_FAITH);
1460 break;
1461
1462 case IP_ONESBCAST:
1463 optval = OPTBIT(INP_ONESBCAST);
1464 break;
1465 case IP_DONTFRAG:
1466 optval = OPTBIT(INP_DONTFRAG);
1467 break;
1468 }
1469 error = sooptcopyout(sopt, &optval, sizeof optval);
1470 break;
1471
1472 case IP_MULTICAST_IF:
1473 case IP_MULTICAST_VIF:
1474 case IP_MULTICAST_TTL:
1475 case IP_MULTICAST_LOOP:
1476 case IP_ADD_MEMBERSHIP:
1477 case IP_DROP_MEMBERSHIP:
1478 error = ip_getmoptions(inp, sopt);
1479 break;
1480
1481 #if defined(IPSEC) || defined(FAST_IPSEC)
1482 case IP_IPSEC_POLICY:
1483 {
1484 struct mbuf *m = NULL;
1485 caddr_t req = NULL;
1486 size_t len = 0;
1487
1488 if (m != 0) {
1489 req = mtod(m, caddr_t);
1490 len = m->m_len;
1491 }
1492 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1493 if (error == 0)
1494 error = soopt_mcopyout(sopt, m); /* XXX */
1495 if (error == 0)
1496 m_freem(m);
1497 break;
1498 }
1499 #endif /*IPSEC*/
1500
1501 default:
1502 error = ENOPROTOOPT;
1503 break;
1504 }
1505 break;
1506 }
1507 return (error);
1508 }
1509
1510 int
1511 ip_ctloutput(so, sopt)
1512 struct socket *so;
1513 struct sockopt *sopt;
1514 {
1515
1516 return (ip_ctloutput_pcbinfo(so, sopt, NULL));
1517 }
1518
1519 /*
1520 * Set up IP options in pcb for insertion in output packets.
1521 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1522 * with destination address if source routed.
1523 */
1524 static int
1525 ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
1526 {
1527 register int cnt, optlen;
1528 register u_char *cp;
1529 struct mbuf **pcbopt;
1530 u_char opt;
1531
1532 INP_LOCK_ASSERT(inp);
1533
1534 pcbopt = &inp->inp_options;
1535
1536 /* turn off any old options */
1537 if (*pcbopt)
1538 (void)m_free(*pcbopt);
1539 *pcbopt = 0;
1540 if (m == NULL || m->m_len == 0) {
1541 /*
1542 * Only turning off any previous options.
1543 */
1544 if (m != NULL)
1545 (void)m_free(m);
1546 return (0);
1547 }
1548
1549 if (m->m_len % sizeof(int32_t))
1550 goto bad;
1551 /*
1552 * IP first-hop destination address will be stored before
1553 * actual options; move other options back
1554 * and clear it when none present.
1555 */
1556 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1557 goto bad;
1558 cnt = m->m_len;
1559 m->m_len += sizeof(struct in_addr);
1560 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1561 bcopy(mtod(m, void *), cp, (unsigned)cnt);
1562 bzero(mtod(m, void *), sizeof(struct in_addr));
1563
1564 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1565 opt = cp[IPOPT_OPTVAL];
1566 if (opt == IPOPT_EOL)
1567 break;
1568 if (opt == IPOPT_NOP)
1569 optlen = 1;
1570 else {
1571 if (cnt < IPOPT_OLEN + sizeof(*cp))
1572 goto bad;
1573 optlen = cp[IPOPT_OLEN];
1574 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1575 goto bad;
1576 }
1577 switch (opt) {
1578
1579 default:
1580 break;
1581
1582 case IPOPT_LSRR:
1583 case IPOPT_SSRR:
1584 /*
1585 * user process specifies route as:
1586 * ->A->B->C->D
1587 * D must be our final destination (but we can't
1588 * check that since we may not have connected yet).
1589 * A is first hop destination, which doesn't appear in
1590 * actual IP option, but is stored before the options.
1591 */
1592 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1593 goto bad;
1594 m->m_len -= sizeof(struct in_addr);
1595 cnt -= sizeof(struct in_addr);
1596 optlen -= sizeof(struct in_addr);
1597 cp[IPOPT_OLEN] = optlen;
1598 /*
1599 * Move first hop before start of options.
1600 */
1601 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1602 sizeof(struct in_addr));
1603 /*
1604 * Then copy rest of options back
1605 * to close up the deleted entry.
1606 */
1607 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
1608 &cp[IPOPT_OFFSET+1],
1609 (unsigned)cnt - (IPOPT_MINOFF - 1));
1610 break;
1611 }
1612 }
1613 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1614 goto bad;
1615 *pcbopt = m;
1616 return (0);
1617
1618 bad:
1619 (void)m_free(m);
1620 return (EINVAL);
1621 }
1622
1623 /*
1624 * XXX
1625 * The whole multicast option thing needs to be re-thought.
1626 * Several of these options are equally applicable to non-multicast
1627 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1628 * standard option (IP_TTL).
1629 */
1630
1631 /*
1632 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1633 */
1634 static struct ifnet *
1635 ip_multicast_if(a, ifindexp)
1636 struct in_addr *a;
1637 int *ifindexp;
1638 {
1639 int ifindex;
1640 struct ifnet *ifp;
1641
1642 if (ifindexp)
1643 *ifindexp = 0;
1644 if (ntohl(a->s_addr) >> 24 == 0) {
1645 ifindex = ntohl(a->s_addr) & 0xffffff;
1646 if (ifindex < 0 || if_index < ifindex)
1647 return NULL;
1648 ifp = ifnet_byindex(ifindex);
1649 if (ifindexp)
1650 *ifindexp = ifindex;
1651 } else {
1652 INADDR_TO_IFP(*a, ifp);
1653 }
1654 return ifp;
1655 }
1656
1657 /*
1658 * Given an inpcb, return its multicast options structure pointer. Accepts
1659 * an unlocked inpcb pointer, but will return it locked. May sleep.
1660 */
1661 static struct ip_moptions *
1662 ip_findmoptions(struct inpcb *inp)
1663 {
1664 struct ip_moptions *imo;
1665
1666 INP_LOCK(inp);
1667 if (inp->inp_moptions != NULL)
1668 return (inp->inp_moptions);
1669
1670 INP_UNLOCK(inp);
1671
1672 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1673
1674 imo->imo_multicast_ifp = NULL;
1675 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1676 imo->imo_multicast_vif = -1;
1677 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1678 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1679 imo->imo_num_memberships = 0;
1680
1681 INP_LOCK(inp);
1682 if (inp->inp_moptions != NULL) {
1683 free(imo, M_IPMOPTS);
1684 return (inp->inp_moptions);
1685 }
1686 inp->inp_moptions = imo;
1687 return (imo);
1688 }
1689
1690 /*
1691 * Set the IP multicast options in response to user setsockopt().
1692 */
1693 static int
1694 ip_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1695 {
1696 int error = 0;
1697 int i;
1698 struct in_addr addr;
1699 struct ip_mreq mreq;
1700 struct ifnet *ifp;
1701 struct ip_moptions *imo;
1702 struct route ro;
1703 struct sockaddr_in *dst;
1704 int ifindex;
1705 int s;
1706
1707 switch (sopt->sopt_name) {
1708 /* store an index number for the vif you wanna use in the send */
1709 case IP_MULTICAST_VIF:
1710 if (legal_vif_num == 0) {
1711 error = EOPNOTSUPP;
1712 break;
1713 }
1714 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1715 if (error)
1716 break;
1717 if (!legal_vif_num(i) && (i != -1)) {
1718 error = EINVAL;
1719 break;
1720 }
1721 imo = ip_findmoptions(inp);
1722 imo->imo_multicast_vif = i;
1723 INP_UNLOCK(inp);
1724 break;
1725
1726 case IP_MULTICAST_IF:
1727 /*
1728 * Select the interface for outgoing multicast packets.
1729 */
1730 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1731 if (error)
1732 break;
1733 /*
1734 * INADDR_ANY is used to remove a previous selection.
1735 * When no interface is selected, a default one is
1736 * chosen every time a multicast packet is sent.
1737 */
1738 imo = ip_findmoptions(inp);
1739 if (addr.s_addr == INADDR_ANY) {
1740 imo->imo_multicast_ifp = NULL;
1741 INP_UNLOCK(inp);
1742 break;
1743 }
1744 /*
1745 * The selected interface is identified by its local
1746 * IP address. Find the interface and confirm that
1747 * it supports multicasting.
1748 */
1749 s = splimp();
1750 ifp = ip_multicast_if(&addr, &ifindex);
1751 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1752 INP_UNLOCK(inp);
1753 splx(s);
1754 error = EADDRNOTAVAIL;
1755 break;
1756 }
1757 imo->imo_multicast_ifp = ifp;
1758 if (ifindex)
1759 imo->imo_multicast_addr = addr;
1760 else
1761 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1762 INP_UNLOCK(inp);
1763 splx(s);
1764 break;
1765
1766 case IP_MULTICAST_TTL:
1767 /*
1768 * Set the IP time-to-live for outgoing multicast packets.
1769 * The original multicast API required a char argument,
1770 * which is inconsistent with the rest of the socket API.
1771 * We allow either a char or an int.
1772 */
1773 if (sopt->sopt_valsize == 1) {
1774 u_char ttl;
1775 error = sooptcopyin(sopt, &ttl, 1, 1);
1776 if (error)
1777 break;
1778 imo = ip_findmoptions(inp);
1779 imo->imo_multicast_ttl = ttl;
1780 INP_UNLOCK(inp);
1781 } else {
1782 u_int ttl;
1783 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1784 sizeof ttl);
1785 if (error)
1786 break;
1787 if (ttl > 255)
1788 error = EINVAL;
1789 else {
1790 imo = ip_findmoptions(inp);
1791 imo->imo_multicast_ttl = ttl;
1792 INP_UNLOCK(inp);
1793 }
1794 }
1795 break;
1796
1797 case IP_MULTICAST_LOOP:
1798 /*
1799 * Set the loopback flag for outgoing multicast packets.
1800 * Must be zero or one. The original multicast API required a
1801 * char argument, which is inconsistent with the rest
1802 * of the socket API. We allow either a char or an int.
1803 */
1804 if (sopt->sopt_valsize == 1) {
1805 u_char loop;
1806 error = sooptcopyin(sopt, &loop, 1, 1);
1807 if (error)
1808 break;
1809 imo = ip_findmoptions(inp);
1810 imo->imo_multicast_loop = !!loop;
1811 INP_UNLOCK(inp);
1812 } else {
1813 u_int loop;
1814 error = sooptcopyin(sopt, &loop, sizeof loop,
1815 sizeof loop);
1816 if (error)
1817 break;
1818 imo = ip_findmoptions(inp);
1819 imo->imo_multicast_loop = !!loop;
1820 INP_UNLOCK(inp);
1821 }
1822 break;
1823
1824 case IP_ADD_MEMBERSHIP:
1825 /*
1826 * Add a multicast group membership.
1827 * Group must be a valid IP multicast address.
1828 */
1829 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1830 if (error)
1831 break;
1832
1833 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1834 error = EINVAL;
1835 break;
1836 }
1837 s = splimp();
1838 /*
1839 * If no interface address was provided, use the interface of
1840 * the route to the given multicast address.
1841 */
1842 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1843 bzero((caddr_t)&ro, sizeof(ro));
1844 dst = (struct sockaddr_in *)&ro.ro_dst;
1845 dst->sin_len = sizeof(*dst);
1846 dst->sin_family = AF_INET;
1847 dst->sin_addr = mreq.imr_multiaddr;
1848 rtalloc_ign(&ro, RTF_CLONING);
1849 if (ro.ro_rt == NULL) {
1850 error = EADDRNOTAVAIL;
1851 splx(s);
1852 break;
1853 }
1854 ifp = ro.ro_rt->rt_ifp;
1855 RTFREE(ro.ro_rt);
1856 }
1857 else {
1858 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1859 }
1860
1861 /*
1862 * See if we found an interface, and confirm that it
1863 * supports multicast.
1864 */
1865 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1866 error = EADDRNOTAVAIL;
1867 splx(s);
1868 break;
1869 }
1870 /*
1871 * See if the membership already exists or if all the
1872 * membership slots are full.
1873 */
1874 imo = ip_findmoptions(inp);
1875 for (i = 0; i < imo->imo_num_memberships; ++i) {
1876 if (imo->imo_membership[i]->inm_ifp == ifp &&
1877 imo->imo_membership[i]->inm_addr.s_addr
1878 == mreq.imr_multiaddr.s_addr)
1879 break;
1880 }
1881 if (i < imo->imo_num_memberships) {
1882 INP_UNLOCK(inp);
1883 error = EADDRINUSE;
1884 splx(s);
1885 break;
1886 }
1887 if (i == IP_MAX_MEMBERSHIPS) {
1888 INP_UNLOCK(inp);
1889 error = ETOOMANYREFS;
1890 splx(s);
1891 break;
1892 }
1893 /*
1894 * Everything looks good; add a new record to the multicast
1895 * address list for the given interface.
1896 */
1897 if ((imo->imo_membership[i] =
1898 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1899 INP_UNLOCK(inp);
1900 error = ENOBUFS;
1901 splx(s);
1902 break;
1903 }
1904 ++imo->imo_num_memberships;
1905 INP_UNLOCK(inp);
1906 splx(s);
1907 break;
1908
1909 case IP_DROP_MEMBERSHIP:
1910 /*
1911 * Drop a multicast group membership.
1912 * Group must be a valid IP multicast address.
1913 */
1914 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1915 if (error)
1916 break;
1917
1918 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1919 error = EINVAL;
1920 break;
1921 }
1922
1923 s = splimp();
1924 /*
1925 * If an interface address was specified, get a pointer
1926 * to its ifnet structure.
1927 */
1928 if (mreq.imr_interface.s_addr == INADDR_ANY)
1929 ifp = NULL;
1930 else {
1931 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1932 if (ifp == NULL) {
1933 error = EADDRNOTAVAIL;
1934 splx(s);
1935 break;
1936 }
1937 }
1938 /*
1939 * Find the membership in the membership array.
1940 */
1941 imo = ip_findmoptions(inp);
1942 for (i = 0; i < imo->imo_num_memberships; ++i) {
1943 if ((ifp == NULL ||
1944 imo->imo_membership[i]->inm_ifp == ifp) &&
1945 imo->imo_membership[i]->inm_addr.s_addr ==
1946 mreq.imr_multiaddr.s_addr)
1947 break;
1948 }
1949 if (i == imo->imo_num_memberships) {
1950 INP_UNLOCK(inp);
1951 error = EADDRNOTAVAIL;
1952 splx(s);
1953 break;
1954 }
1955 /*
1956 * Give up the multicast address record to which the
1957 * membership points.
1958 */
1959 in_delmulti(imo->imo_membership[i]);
1960 /*
1961 * Remove the gap in the membership array.
1962 */
1963 for (++i; i < imo->imo_num_memberships; ++i)
1964 imo->imo_membership[i-1] = imo->imo_membership[i];
1965 --imo->imo_num_memberships;
1966 INP_UNLOCK(inp);
1967 splx(s);
1968 break;
1969
1970 default:
1971 error = EOPNOTSUPP;
1972 break;
1973 }
1974
1975 return (error);
1976 }
1977
1978 /*
1979 * Return the IP multicast options in response to user getsockopt().
1980 */
1981 static int
1982 ip_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1983 {
1984 struct ip_moptions *imo;
1985 struct in_addr addr;
1986 struct in_ifaddr *ia;
1987 int error, optval;
1988 u_char coptval;
1989
1990 INP_LOCK(inp);
1991 imo = inp->inp_moptions;
1992
1993 error = 0;
1994 switch (sopt->sopt_name) {
1995 case IP_MULTICAST_VIF:
1996 if (imo != NULL)
1997 optval = imo->imo_multicast_vif;
1998 else
1999 optval = -1;
2000 INP_UNLOCK(inp);
2001 error = sooptcopyout(sopt, &optval, sizeof optval);
2002 break;
2003
2004 case IP_MULTICAST_IF:
2005 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2006 addr.s_addr = INADDR_ANY;
2007 else if (imo->imo_multicast_addr.s_addr) {
2008 /* return the value user has set */
2009 addr = imo->imo_multicast_addr;
2010 } else {
2011 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2012 addr.s_addr = (ia == NULL) ? INADDR_ANY
2013 : IA_SIN(ia)->sin_addr.s_addr;
2014 }
2015 INP_UNLOCK(inp);
2016 error = sooptcopyout(sopt, &addr, sizeof addr);
2017 break;
2018
2019 case IP_MULTICAST_TTL:
2020 if (imo == 0)
2021 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2022 else
2023 optval = coptval = imo->imo_multicast_ttl;
2024 INP_UNLOCK(inp);
2025 if (sopt->sopt_valsize == 1)
2026 error = sooptcopyout(sopt, &coptval, 1);
2027 else
2028 error = sooptcopyout(sopt, &optval, sizeof optval);
2029 break;
2030
2031 case IP_MULTICAST_LOOP:
2032 if (imo == 0)
2033 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2034 else
2035 optval = coptval = imo->imo_multicast_loop;
2036 INP_UNLOCK(inp);
2037 if (sopt->sopt_valsize == 1)
2038 error = sooptcopyout(sopt, &coptval, 1);
2039 else
2040 error = sooptcopyout(sopt, &optval, sizeof optval);
2041 break;
2042
2043 default:
2044 INP_UNLOCK(inp);
2045 error = ENOPROTOOPT;
2046 break;
2047 }
2048 INP_UNLOCK_ASSERT(inp);
2049
2050 return (error);
2051 }
2052
2053 /*
2054 * Discard the IP multicast options.
2055 */
2056 void
2057 ip_freemoptions(imo)
2058 register struct ip_moptions *imo;
2059 {
2060 register int i;
2061
2062 if (imo != NULL) {
2063 for (i = 0; i < imo->imo_num_memberships; ++i)
2064 in_delmulti(imo->imo_membership[i]);
2065 free(imo, M_IPMOPTS);
2066 }
2067 }
2068
2069 /*
2070 * Routine called from ip_output() to loop back a copy of an IP multicast
2071 * packet to the input queue of a specified interface. Note that this
2072 * calls the output routine of the loopback "driver", but with an interface
2073 * pointer that might NOT be a loopback interface -- evil, but easier than
2074 * replicating that code here.
2075 */
2076 static void
2077 ip_mloopback(ifp, m, dst, hlen)
2078 struct ifnet *ifp;
2079 register struct mbuf *m;
2080 register struct sockaddr_in *dst;
2081 int hlen;
2082 {
2083 register struct ip *ip;
2084 struct mbuf *copym;
2085
2086 copym = m_copy(m, 0, M_COPYALL);
2087 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2088 copym = m_pullup(copym, hlen);
2089 if (copym != NULL) {
2090 /* If needed, compute the checksum and mark it as valid. */
2091 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2092 in_delayed_cksum(copym);
2093 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
2094 copym->m_pkthdr.csum_flags |=
2095 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2096 copym->m_pkthdr.csum_data = 0xffff;
2097 }
2098 /*
2099 * We don't bother to fragment if the IP length is greater
2100 * than the interface's MTU. Can this possibly matter?
2101 */
2102 ip = mtod(copym, struct ip *);
2103 ip->ip_len = htons(ip->ip_len);
2104 ip->ip_off = htons(ip->ip_off);
2105 ip->ip_sum = 0;
2106 ip->ip_sum = in_cksum(copym, hlen);
2107 /*
2108 * NB:
2109 * It's not clear whether there are any lingering
2110 * reentrancy problems in other areas which might
2111 * be exposed by using ip_input directly (in
2112 * particular, everything which modifies the packet
2113 * in-place). Yet another option is using the
2114 * protosw directly to deliver the looped back
2115 * packet. For the moment, we'll err on the side
2116 * of safety by using if_simloop().
2117 */
2118 #if 1 /* XXX */
2119 if (dst->sin_family != AF_INET) {
2120 printf("ip_mloopback: bad address family %d\n",
2121 dst->sin_family);
2122 dst->sin_family = AF_INET;
2123 }
2124 #endif
2125
2126 #ifdef notdef
2127 copym->m_pkthdr.rcvif = ifp;
2128 ip_input(copym);
2129 #else
2130 if_simloop(ifp, copym, dst->sin_family, 0);
2131 #endif
2132 }
2133 }
Cache object: e8b45ed1f2ff7f2edddab8c6e3cbbc53
|