1 /*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
30 * $FreeBSD: releng/6.2/sys/netinet/ip_output.c 164731 2006-11-28 23:19:18Z rwatson $
31 */
32
33 #include "opt_ipfw.h"
34 #include "opt_ipsec.h"
35 #include "opt_mac.h"
36 #include "opt_mbuf_stress_test.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/mac.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48
49 #include <net/if.h>
50 #include <net/netisr.h>
51 #include <net/pfil.h>
52 #include <net/route.h>
53
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_var.h>
60
61 #include <machine/in_cksum.h>
62
63 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
64
65 #ifdef IPSEC
66 #include <netinet6/ipsec.h>
67 #include <netkey/key.h>
68 #ifdef IPSEC_DEBUG
69 #include <netkey/key_debug.h>
70 #else
71 #define KEYDEBUG(lev,arg)
72 #endif
73 #endif /*IPSEC*/
74
75 #ifdef FAST_IPSEC
76 #include <netipsec/ipsec.h>
77 #include <netipsec/xform.h>
78 #include <netipsec/key.h>
79 #endif /*FAST_IPSEC*/
80
81 #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
82 x, (ntohl(a.s_addr)>>24)&0xFF,\
83 (ntohl(a.s_addr)>>16)&0xFF,\
84 (ntohl(a.s_addr)>>8)&0xFF,\
85 (ntohl(a.s_addr))&0xFF, y);
86
87 u_short ip_id;
88
89 #ifdef MBUF_STRESS_TEST
90 int mbuf_frag_size = 0;
91 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
92 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
93 #endif
94
95 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
96 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
97 static void ip_mloopback
98 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
99 static int ip_getmoptions(struct inpcb *, struct sockopt *);
100 static int ip_pcbopts(struct inpcb *, int, struct mbuf *);
101 static int ip_setmoptions(struct inpcb *, struct sockopt *);
102 static struct ip_moptions *ip_findmoptions(struct inpcb *inp);
103
104 int ip_optcopy(struct ip *, struct ip *);
105
106
107 extern struct protosw inetsw[];
108
109 /*
110 * IP output. The packet in mbuf chain m contains a skeletal IP
111 * header (with len, off, ttl, proto, tos, src, dst).
112 * The mbuf chain containing the packet will be freed.
113 * The mbuf opt, if present, will not be freed.
114 * In the IP forwarding case, the packet will arrive with options already
115 * inserted, so must have a NULL opt pointer.
116 */
117 int
118 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
119 int flags, struct ip_moptions *imo, struct inpcb *inp)
120 {
121 struct ip *ip;
122 struct ifnet *ifp = NULL; /* keep compiler happy */
123 struct mbuf *m0;
124 int hlen = sizeof (struct ip);
125 int len, error = 0;
126 struct sockaddr_in *dst = NULL; /* keep compiler happy */
127 struct in_ifaddr *ia = NULL;
128 int isbroadcast, sw_csum;
129 struct route iproute;
130 struct in_addr odst;
131 #ifdef IPFIREWALL_FORWARD
132 struct m_tag *fwd_tag = NULL;
133 #endif
134 #ifdef IPSEC
135 struct secpolicy *sp = NULL;
136 #endif
137 #ifdef FAST_IPSEC
138 struct secpolicy *sp = NULL;
139 struct tdb_ident *tdbi;
140 struct m_tag *mtag;
141 int s;
142 #endif /* FAST_IPSEC */
143
144 M_ASSERTPKTHDR(m);
145
146 if (ro == NULL) {
147 ro = &iproute;
148 bzero(ro, sizeof (*ro));
149 }
150
151 if (inp != NULL)
152 INP_LOCK_ASSERT(inp);
153
154 if (opt) {
155 len = 0;
156 m = ip_insertoptions(m, opt, &len);
157 if (len != 0)
158 hlen = len;
159 }
160 ip = mtod(m, struct ip *);
161
162 /*
163 * Fill in IP header. If we are not allowing fragmentation,
164 * then the ip_id field is meaningless, but we don't set it
165 * to zero. Doing so causes various problems when devices along
166 * the path (routers, load balancers, firewalls, etc.) illegally
167 * disable DF on our packet. Note that a 16-bit counter
168 * will wrap around in less than 10 seconds at 100 Mbit/s on a
169 * medium with MTU 1500. See Steven M. Bellovin, "A Technique
170 * for Counting NATted Hosts", Proc. IMW'02, available at
171 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
172 */
173 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
174 ip->ip_v = IPVERSION;
175 ip->ip_hl = hlen >> 2;
176 ip->ip_id = ip_newid();
177 ipstat.ips_localout++;
178 } else {
179 hlen = ip->ip_hl << 2;
180 }
181
182 dst = (struct sockaddr_in *)&ro->ro_dst;
183 again:
184 /*
185 * If there is a cached route,
186 * check that it is to the same destination
187 * and is still up. If not, free it and try again.
188 * The address family should also be checked in case of sharing the
189 * cache with IPv6.
190 */
191 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
192 dst->sin_family != AF_INET ||
193 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
194 RTFREE(ro->ro_rt);
195 ro->ro_rt = (struct rtentry *)0;
196 }
197 #ifdef IPFIREWALL_FORWARD
198 if (ro->ro_rt == NULL && fwd_tag == NULL) {
199 #else
200 if (ro->ro_rt == NULL) {
201 #endif
202 bzero(dst, sizeof(*dst));
203 dst->sin_family = AF_INET;
204 dst->sin_len = sizeof(*dst);
205 dst->sin_addr = ip->ip_dst;
206 }
207 /*
208 * If routing to interface only,
209 * short circuit routing lookup.
210 */
211 if (flags & IP_ROUTETOIF) {
212 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
213 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
214 ipstat.ips_noroute++;
215 error = ENETUNREACH;
216 goto bad;
217 }
218 ifp = ia->ia_ifp;
219 ip->ip_ttl = 1;
220 isbroadcast = in_broadcast(dst->sin_addr, ifp);
221 } else if (flags & IP_SENDONES) {
222 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL) {
223 ipstat.ips_noroute++;
224 error = ENETUNREACH;
225 goto bad;
226 }
227 ifp = ia->ia_ifp;
228 ip->ip_dst.s_addr = INADDR_BROADCAST;
229 dst->sin_addr = ip->ip_dst;
230 ip->ip_ttl = 1;
231 isbroadcast = 1;
232 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
233 imo != NULL && imo->imo_multicast_ifp != NULL) {
234 /*
235 * Bypass the normal routing lookup for multicast
236 * packets if the interface is specified.
237 */
238 ifp = imo->imo_multicast_ifp;
239 IFP_TO_IA(ifp, ia);
240 isbroadcast = 0; /* fool gcc */
241 } else {
242 /*
243 * We want to do any cloning requested by the link layer,
244 * as this is probably required in all cases for correct
245 * operation (as it is for ARP).
246 */
247 if (ro->ro_rt == NULL)
248 rtalloc_ign(ro, 0);
249 if (ro->ro_rt == NULL) {
250 ipstat.ips_noroute++;
251 error = EHOSTUNREACH;
252 goto bad;
253 }
254 ia = ifatoia(ro->ro_rt->rt_ifa);
255 ifp = ro->ro_rt->rt_ifp;
256 ro->ro_rt->rt_rmx.rmx_pksent++;
257 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
258 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
259 if (ro->ro_rt->rt_flags & RTF_HOST)
260 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
261 else
262 isbroadcast = in_broadcast(dst->sin_addr, ifp);
263 }
264 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
265 struct in_multi *inm;
266
267 m->m_flags |= M_MCAST;
268 /*
269 * IP destination address is multicast. Make sure "dst"
270 * still points to the address in "ro". (It may have been
271 * changed to point to a gateway address, above.)
272 */
273 dst = (struct sockaddr_in *)&ro->ro_dst;
274 /*
275 * See if the caller provided any multicast options
276 */
277 if (imo != NULL) {
278 ip->ip_ttl = imo->imo_multicast_ttl;
279 if (imo->imo_multicast_vif != -1)
280 ip->ip_src.s_addr =
281 ip_mcast_src ?
282 ip_mcast_src(imo->imo_multicast_vif) :
283 INADDR_ANY;
284 } else
285 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
286 /*
287 * Confirm that the outgoing interface supports multicast.
288 */
289 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
290 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
291 ipstat.ips_noroute++;
292 error = ENETUNREACH;
293 goto bad;
294 }
295 }
296 /*
297 * If source address not specified yet, use address
298 * of outgoing interface.
299 */
300 if (ip->ip_src.s_addr == INADDR_ANY) {
301 /* Interface may have no addresses. */
302 if (ia != NULL)
303 ip->ip_src = IA_SIN(ia)->sin_addr;
304 }
305
306 IN_MULTI_LOCK();
307 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
308 if (inm != NULL &&
309 (imo == NULL || imo->imo_multicast_loop)) {
310 IN_MULTI_UNLOCK();
311 /*
312 * If we belong to the destination multicast group
313 * on the outgoing interface, and the caller did not
314 * forbid loopback, loop back a copy.
315 */
316 ip_mloopback(ifp, m, dst, hlen);
317 }
318 else {
319 IN_MULTI_UNLOCK();
320 /*
321 * If we are acting as a multicast router, perform
322 * multicast forwarding as if the packet had just
323 * arrived on the interface to which we are about
324 * to send. The multicast forwarding function
325 * recursively calls this function, using the
326 * IP_FORWARDING flag to prevent infinite recursion.
327 *
328 * Multicasts that are looped back by ip_mloopback(),
329 * above, will be forwarded by the ip_input() routine,
330 * if necessary.
331 */
332 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
333 /*
334 * If rsvp daemon is not running, do not
335 * set ip_moptions. This ensures that the packet
336 * is multicast and not just sent down one link
337 * as prescribed by rsvpd.
338 */
339 if (!rsvp_on)
340 imo = NULL;
341 if (ip_mforward &&
342 ip_mforward(ip, ifp, m, imo) != 0) {
343 m_freem(m);
344 goto done;
345 }
346 }
347 }
348
349 /*
350 * Multicasts with a time-to-live of zero may be looped-
351 * back, above, but must not be transmitted on a network.
352 * Also, multicasts addressed to the loopback interface
353 * are not sent -- the above call to ip_mloopback() will
354 * loop back a copy if this host actually belongs to the
355 * destination group on the loopback interface.
356 */
357 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
358 m_freem(m);
359 goto done;
360 }
361
362 goto sendit;
363 }
364 #ifndef notdef
365 /*
366 * If the source address is not specified yet, use the address
367 * of the outoing interface.
368 */
369 if (ip->ip_src.s_addr == INADDR_ANY) {
370 /* Interface may have no addresses. */
371 if (ia != NULL) {
372 ip->ip_src = IA_SIN(ia)->sin_addr;
373 }
374 }
375 #endif /* notdef */
376 /*
377 * Verify that we have any chance at all of being able to queue the
378 * packet or packet fragments, unless ALTQ is enabled on the given
379 * interface in which case packetdrop should be done by queueing.
380 */
381 #ifdef ALTQ
382 if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
383 ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
384 ifp->if_snd.ifq_maxlen))
385 #else
386 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
387 ifp->if_snd.ifq_maxlen)
388 #endif /* ALTQ */
389 {
390 error = ENOBUFS;
391 ipstat.ips_odropped++;
392 ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
393 goto bad;
394 }
395
396 /*
397 * Look for broadcast address and
398 * verify user is allowed to send
399 * such a packet.
400 */
401 if (isbroadcast) {
402 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
403 error = EADDRNOTAVAIL;
404 goto bad;
405 }
406 if ((flags & IP_ALLOWBROADCAST) == 0) {
407 error = EACCES;
408 goto bad;
409 }
410 /* don't allow broadcast messages to be fragmented */
411 if (ip->ip_len > ifp->if_mtu) {
412 error = EMSGSIZE;
413 goto bad;
414 }
415 m->m_flags |= M_BCAST;
416 } else {
417 m->m_flags &= ~M_BCAST;
418 }
419
420 sendit:
421 #ifdef IPSEC
422 /* get SP for this packet */
423 if (inp == NULL)
424 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
425 flags, &error);
426 else
427 sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
428
429 if (sp == NULL) {
430 ipsecstat.out_inval++;
431 goto bad;
432 }
433
434 error = 0;
435
436 /* check policy */
437 switch (sp->policy) {
438 case IPSEC_POLICY_DISCARD:
439 /*
440 * This packet is just discarded.
441 */
442 ipsecstat.out_polvio++;
443 goto bad;
444
445 case IPSEC_POLICY_BYPASS:
446 case IPSEC_POLICY_NONE:
447 case IPSEC_POLICY_TCP:
448 /* no need to do IPsec. */
449 goto skip_ipsec;
450
451 case IPSEC_POLICY_IPSEC:
452 if (sp->req == NULL) {
453 /* acquire a policy */
454 error = key_spdacquire(sp);
455 goto bad;
456 }
457 break;
458
459 case IPSEC_POLICY_ENTRUST:
460 default:
461 printf("ip_output: Invalid policy found. %d\n", sp->policy);
462 }
463 {
464 struct ipsec_output_state state;
465 bzero(&state, sizeof(state));
466 state.m = m;
467 if (flags & IP_ROUTETOIF) {
468 state.ro = &iproute;
469 bzero(&iproute, sizeof(iproute));
470 } else
471 state.ro = ro;
472 state.dst = (struct sockaddr *)dst;
473
474 ip->ip_sum = 0;
475
476 /*
477 * XXX
478 * delayed checksums are not currently compatible with IPsec
479 */
480 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
481 in_delayed_cksum(m);
482 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
483 }
484
485 ip->ip_len = htons(ip->ip_len);
486 ip->ip_off = htons(ip->ip_off);
487
488 error = ipsec4_output(&state, sp, flags);
489
490 m = state.m;
491 if (flags & IP_ROUTETOIF) {
492 /*
493 * if we have tunnel mode SA, we may need to ignore
494 * IP_ROUTETOIF.
495 */
496 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
497 flags &= ~IP_ROUTETOIF;
498 ro = state.ro;
499 }
500 } else
501 ro = state.ro;
502 dst = (struct sockaddr_in *)state.dst;
503 if (error) {
504 /* mbuf is already reclaimed in ipsec4_output. */
505 m = NULL;
506 switch (error) {
507 case EHOSTUNREACH:
508 case ENETUNREACH:
509 case EMSGSIZE:
510 case ENOBUFS:
511 case ENOMEM:
512 break;
513 default:
514 printf("ip4_output (ipsec): error code %d\n", error);
515 /*fall through*/
516 case ENOENT:
517 /* don't show these error codes to the user */
518 error = 0;
519 break;
520 }
521 goto bad;
522 }
523
524 /* be sure to update variables that are affected by ipsec4_output() */
525 ip = mtod(m, struct ip *);
526 hlen = ip->ip_hl << 2;
527 if (ro->ro_rt == NULL) {
528 if ((flags & IP_ROUTETOIF) == 0) {
529 printf("ip_output: "
530 "can't update route after IPsec processing\n");
531 error = EHOSTUNREACH; /*XXX*/
532 goto bad;
533 }
534 } else {
535 if (state.encap) {
536 ia = ifatoia(ro->ro_rt->rt_ifa);
537 ifp = ro->ro_rt->rt_ifp;
538 }
539 }
540 }
541
542 /* make it flipped, again. */
543 ip->ip_len = ntohs(ip->ip_len);
544 ip->ip_off = ntohs(ip->ip_off);
545 skip_ipsec:
546 #endif /*IPSEC*/
547 #ifdef FAST_IPSEC
548 /*
549 * Check the security policy (SP) for the packet and, if
550 * required, do IPsec-related processing. There are two
551 * cases here; the first time a packet is sent through
552 * it will be untagged and handled by ipsec4_checkpolicy.
553 * If the packet is resubmitted to ip_output (e.g. after
554 * AH, ESP, etc. processing), there will be a tag to bypass
555 * the lookup and related policy checking.
556 */
557 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
558 s = splnet();
559 if (mtag != NULL) {
560 tdbi = (struct tdb_ident *)(mtag + 1);
561 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
562 if (sp == NULL)
563 error = -EINVAL; /* force silent drop */
564 m_tag_delete(m, mtag);
565 } else {
566 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
567 &error, inp);
568 }
569 /*
570 * There are four return cases:
571 * sp != NULL apply IPsec policy
572 * sp == NULL, error == 0 no IPsec handling needed
573 * sp == NULL, error == -EINVAL discard packet w/o error
574 * sp == NULL, error != 0 discard packet, report error
575 */
576 if (sp != NULL) {
577 /* Loop detection, check if ipsec processing already done */
578 KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
579 for (mtag = m_tag_first(m); mtag != NULL;
580 mtag = m_tag_next(m, mtag)) {
581 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
582 continue;
583 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
584 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
585 continue;
586 /*
587 * Check if policy has an SA associated with it.
588 * This can happen when an SP has yet to acquire
589 * an SA; e.g. on first reference. If it occurs,
590 * then we let ipsec4_process_packet do its thing.
591 */
592 if (sp->req->sav == NULL)
593 break;
594 tdbi = (struct tdb_ident *)(mtag + 1);
595 if (tdbi->spi == sp->req->sav->spi &&
596 tdbi->proto == sp->req->sav->sah->saidx.proto &&
597 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
598 sizeof (union sockaddr_union)) == 0) {
599 /*
600 * No IPsec processing is needed, free
601 * reference to SP.
602 *
603 * NB: null pointer to avoid free at
604 * done: below.
605 */
606 KEY_FREESP(&sp), sp = NULL;
607 splx(s);
608 goto spd_done;
609 }
610 }
611
612 /*
613 * Do delayed checksums now because we send before
614 * this is done in the normal processing path.
615 */
616 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
617 in_delayed_cksum(m);
618 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
619 }
620
621 ip->ip_len = htons(ip->ip_len);
622 ip->ip_off = htons(ip->ip_off);
623
624 /* NB: callee frees mbuf */
625 error = ipsec4_process_packet(m, sp->req, flags, 0);
626 /*
627 * Preserve KAME behaviour: ENOENT can be returned
628 * when an SA acquire is in progress. Don't propagate
629 * this to user-level; it confuses applications.
630 *
631 * XXX this will go away when the SADB is redone.
632 */
633 if (error == ENOENT)
634 error = 0;
635 splx(s);
636 goto done;
637 } else {
638 splx(s);
639
640 if (error != 0) {
641 /*
642 * Hack: -EINVAL is used to signal that a packet
643 * should be silently discarded. This is typically
644 * because we asked key management for an SA and
645 * it was delayed (e.g. kicked up to IKE).
646 */
647 if (error == -EINVAL)
648 error = 0;
649 goto bad;
650 } else {
651 /* No IPsec processing for this packet. */
652 }
653 #ifdef notyet
654 /*
655 * If deferred crypto processing is needed, check that
656 * the interface supports it.
657 */
658 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
659 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
660 /* notify IPsec to do its own crypto */
661 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
662 error = EHOSTUNREACH;
663 goto bad;
664 }
665 #endif
666 }
667 spd_done:
668 #endif /* FAST_IPSEC */
669
670 /* Jump over all PFIL processing if hooks are not active. */
671 if (inet_pfil_hook.ph_busy_count == -1)
672 goto passout;
673
674 /* Run through list of hooks for output packets. */
675 odst.s_addr = ip->ip_dst.s_addr;
676 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
677 if (error != 0 || m == NULL)
678 goto done;
679
680 ip = mtod(m, struct ip *);
681
682 /* See if destination IP address was changed by packet filter. */
683 if (odst.s_addr != ip->ip_dst.s_addr) {
684 m->m_flags |= M_SKIP_FIREWALL;
685 /* If destination is now ourself drop to ip_input(). */
686 if (in_localip(ip->ip_dst)) {
687 m->m_flags |= M_FASTFWD_OURS;
688 if (m->m_pkthdr.rcvif == NULL)
689 m->m_pkthdr.rcvif = loif;
690 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
691 m->m_pkthdr.csum_flags |=
692 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
693 m->m_pkthdr.csum_data = 0xffff;
694 }
695 m->m_pkthdr.csum_flags |=
696 CSUM_IP_CHECKED | CSUM_IP_VALID;
697
698 error = netisr_queue(NETISR_IP, m);
699 goto done;
700 } else
701 goto again; /* Redo the routing table lookup. */
702 }
703
704 #ifdef IPFIREWALL_FORWARD
705 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
706 if (m->m_flags & M_FASTFWD_OURS) {
707 if (m->m_pkthdr.rcvif == NULL)
708 m->m_pkthdr.rcvif = loif;
709 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
710 m->m_pkthdr.csum_flags |=
711 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
712 m->m_pkthdr.csum_data = 0xffff;
713 }
714 m->m_pkthdr.csum_flags |=
715 CSUM_IP_CHECKED | CSUM_IP_VALID;
716
717 error = netisr_queue(NETISR_IP, m);
718 goto done;
719 }
720 /* Or forward to some other address? */
721 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
722 if (fwd_tag) {
723 dst = (struct sockaddr_in *)&ro->ro_dst;
724 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
725 m->m_flags |= M_SKIP_FIREWALL;
726 m_tag_delete(m, fwd_tag);
727 goto again;
728 }
729 #endif /* IPFIREWALL_FORWARD */
730
731 passout:
732 /* 127/8 must not appear on wire - RFC1122. */
733 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
734 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
735 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
736 ipstat.ips_badaddr++;
737 error = EADDRNOTAVAIL;
738 goto bad;
739 }
740 }
741
742 m->m_pkthdr.csum_flags |= CSUM_IP;
743 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
744 if (sw_csum & CSUM_DELAY_DATA) {
745 in_delayed_cksum(m);
746 sw_csum &= ~CSUM_DELAY_DATA;
747 }
748 m->m_pkthdr.csum_flags &= ifp->if_hwassist;
749
750 /*
751 * If small enough for interface, or the interface will take
752 * care of the fragmentation for us, can just send directly.
753 */
754 if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
755 ((ip->ip_off & IP_DF) == 0))) {
756 ip->ip_len = htons(ip->ip_len);
757 ip->ip_off = htons(ip->ip_off);
758 ip->ip_sum = 0;
759 if (sw_csum & CSUM_DELAY_IP)
760 ip->ip_sum = in_cksum(m, hlen);
761
762 /* Record statistics for this interface address. */
763 if (!(flags & IP_FORWARDING) && ia) {
764 ia->ia_ifa.if_opackets++;
765 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
766 }
767
768 #ifdef IPSEC
769 /* clean ipsec history once it goes out of the node */
770 ipsec_delaux(m);
771 #endif
772
773 #ifdef MBUF_STRESS_TEST
774 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
775 m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
776 #endif
777 error = (*ifp->if_output)(ifp, m,
778 (struct sockaddr *)dst, ro->ro_rt);
779 goto done;
780 }
781
782 if (ip->ip_off & IP_DF) {
783 error = EMSGSIZE;
784 /*
785 * This case can happen if the user changed the MTU
786 * of an interface after enabling IP on it. Because
787 * most netifs don't keep track of routes pointing to
788 * them, there is no way for one to update all its
789 * routes when the MTU is changed.
790 */
791 if (ro != NULL &&
792 (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
793 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
794 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
795 }
796 ipstat.ips_cantfrag++;
797 goto bad;
798 }
799
800 /*
801 * Too large for interface; fragment if possible. If successful,
802 * on return, m will point to a list of packets to be sent.
803 */
804 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
805 if (error)
806 goto bad;
807 for (; m; m = m0) {
808 m0 = m->m_nextpkt;
809 m->m_nextpkt = 0;
810 #ifdef IPSEC
811 /* clean ipsec history once it goes out of the node */
812 ipsec_delaux(m);
813 #endif
814 if (error == 0) {
815 /* Record statistics for this interface address. */
816 if (ia != NULL) {
817 ia->ia_ifa.if_opackets++;
818 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
819 }
820
821 error = (*ifp->if_output)(ifp, m,
822 (struct sockaddr *)dst, ro->ro_rt);
823 } else
824 m_freem(m);
825 }
826
827 if (error == 0)
828 ipstat.ips_fragmented++;
829
830 done:
831 if (ro == &iproute && ro->ro_rt) {
832 RTFREE(ro->ro_rt);
833 }
834 #ifdef IPSEC
835 if (sp != NULL) {
836 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
837 printf("DP ip_output call free SP:%p\n", sp));
838 key_freesp(sp);
839 }
840 #endif
841 #ifdef FAST_IPSEC
842 if (sp != NULL)
843 KEY_FREESP(&sp);
844 #endif
845 return (error);
846 bad:
847 m_freem(m);
848 goto done;
849 }
850
851 /*
852 * Create a chain of fragments which fit the given mtu. m_frag points to the
853 * mbuf to be fragmented; on return it points to the chain with the fragments.
854 * Return 0 if no error. If error, m_frag may contain a partially built
855 * chain of fragments that should be freed by the caller.
856 *
857 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
858 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
859 */
860 int
861 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
862 u_long if_hwassist_flags, int sw_csum)
863 {
864 int error = 0;
865 int hlen = ip->ip_hl << 2;
866 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */
867 int off;
868 struct mbuf *m0 = *m_frag; /* the original packet */
869 int firstlen;
870 struct mbuf **mnext;
871 int nfrags;
872
873 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
874 ipstat.ips_cantfrag++;
875 return EMSGSIZE;
876 }
877
878 /*
879 * Must be able to put at least 8 bytes per fragment.
880 */
881 if (len < 8)
882 return EMSGSIZE;
883
884 /*
885 * If the interface will not calculate checksums on
886 * fragmented packets, then do it here.
887 */
888 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
889 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
890 in_delayed_cksum(m0);
891 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
892 }
893
894 if (len > PAGE_SIZE) {
895 /*
896 * Fragment large datagrams such that each segment
897 * contains a multiple of PAGE_SIZE amount of data,
898 * plus headers. This enables a receiver to perform
899 * page-flipping zero-copy optimizations.
900 *
901 * XXX When does this help given that sender and receiver
902 * could have different page sizes, and also mtu could
903 * be less than the receiver's page size ?
904 */
905 int newlen;
906 struct mbuf *m;
907
908 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
909 off += m->m_len;
910
911 /*
912 * firstlen (off - hlen) must be aligned on an
913 * 8-byte boundary
914 */
915 if (off < hlen)
916 goto smart_frag_failure;
917 off = ((off - hlen) & ~7) + hlen;
918 newlen = (~PAGE_MASK) & mtu;
919 if ((newlen + sizeof (struct ip)) > mtu) {
920 /* we failed, go back the default */
921 smart_frag_failure:
922 newlen = len;
923 off = hlen + len;
924 }
925 len = newlen;
926
927 } else {
928 off = hlen + len;
929 }
930
931 firstlen = off - hlen;
932 mnext = &m0->m_nextpkt; /* pointer to next packet */
933
934 /*
935 * Loop through length of segment after first fragment,
936 * make new header and copy data of each part and link onto chain.
937 * Here, m0 is the original packet, m is the fragment being created.
938 * The fragments are linked off the m_nextpkt of the original
939 * packet, which after processing serves as the first fragment.
940 */
941 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
942 struct ip *mhip; /* ip header on the fragment */
943 struct mbuf *m;
944 int mhlen = sizeof (struct ip);
945
946 MGETHDR(m, M_DONTWAIT, MT_HEADER);
947 if (m == NULL) {
948 error = ENOBUFS;
949 ipstat.ips_odropped++;
950 goto done;
951 }
952 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
953 /*
954 * In the first mbuf, leave room for the link header, then
955 * copy the original IP header including options. The payload
956 * goes into an additional mbuf chain returned by m_copy().
957 */
958 m->m_data += max_linkhdr;
959 mhip = mtod(m, struct ip *);
960 *mhip = *ip;
961 if (hlen > sizeof (struct ip)) {
962 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
963 mhip->ip_v = IPVERSION;
964 mhip->ip_hl = mhlen >> 2;
965 }
966 m->m_len = mhlen;
967 /* XXX do we need to add ip->ip_off below ? */
968 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
969 if (off + len >= ip->ip_len) { /* last fragment */
970 len = ip->ip_len - off;
971 m->m_flags |= M_LASTFRAG;
972 } else
973 mhip->ip_off |= IP_MF;
974 mhip->ip_len = htons((u_short)(len + mhlen));
975 m->m_next = m_copy(m0, off, len);
976 if (m->m_next == NULL) { /* copy failed */
977 m_free(m);
978 error = ENOBUFS; /* ??? */
979 ipstat.ips_odropped++;
980 goto done;
981 }
982 m->m_pkthdr.len = mhlen + len;
983 m->m_pkthdr.rcvif = NULL;
984 #ifdef MAC
985 mac_create_fragment(m0, m);
986 #endif
987 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
988 mhip->ip_off = htons(mhip->ip_off);
989 mhip->ip_sum = 0;
990 if (sw_csum & CSUM_DELAY_IP)
991 mhip->ip_sum = in_cksum(m, mhlen);
992 *mnext = m;
993 mnext = &m->m_nextpkt;
994 }
995 ipstat.ips_ofragments += nfrags;
996
997 /* set first marker for fragment chain */
998 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
999 m0->m_pkthdr.csum_data = nfrags;
1000
1001 /*
1002 * Update first fragment by trimming what's been copied out
1003 * and updating header.
1004 */
1005 m_adj(m0, hlen + firstlen - ip->ip_len);
1006 m0->m_pkthdr.len = hlen + firstlen;
1007 ip->ip_len = htons((u_short)m0->m_pkthdr.len);
1008 ip->ip_off |= IP_MF;
1009 ip->ip_off = htons(ip->ip_off);
1010 ip->ip_sum = 0;
1011 if (sw_csum & CSUM_DELAY_IP)
1012 ip->ip_sum = in_cksum(m0, hlen);
1013
1014 done:
1015 *m_frag = m0;
1016 return error;
1017 }
1018
1019 void
1020 in_delayed_cksum(struct mbuf *m)
1021 {
1022 struct ip *ip;
1023 u_short csum, offset;
1024
1025 ip = mtod(m, struct ip *);
1026 offset = ip->ip_hl << 2 ;
1027 csum = in_cksum_skip(m, ip->ip_len, offset);
1028 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1029 csum = 0xffff;
1030 offset += m->m_pkthdr.csum_data; /* checksum offset */
1031
1032 if (offset + sizeof(u_short) > m->m_len) {
1033 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1034 m->m_len, offset, ip->ip_p);
1035 /*
1036 * XXX
1037 * this shouldn't happen, but if it does, the
1038 * correct behavior may be to insert the checksum
1039 * in the appropriate next mbuf in the chain.
1040 */
1041 return;
1042 }
1043 *(u_short *)(m->m_data + offset) = csum;
1044 }
1045
1046 /*
1047 * Insert IP options into preformed packet.
1048 * Adjust IP destination as required for IP source routing,
1049 * as indicated by a non-zero in_addr at the start of the options.
1050 *
1051 * XXX This routine assumes that the packet has no options in place.
1052 */
1053 static struct mbuf *
1054 ip_insertoptions(m, opt, phlen)
1055 register struct mbuf *m;
1056 struct mbuf *opt;
1057 int *phlen;
1058 {
1059 register struct ipoption *p = mtod(opt, struct ipoption *);
1060 struct mbuf *n;
1061 register struct ip *ip = mtod(m, struct ip *);
1062 unsigned optlen;
1063
1064 optlen = opt->m_len - sizeof(p->ipopt_dst);
1065 if (optlen + ip->ip_len > IP_MAXPACKET) {
1066 *phlen = 0;
1067 return (m); /* XXX should fail */
1068 }
1069 if (p->ipopt_dst.s_addr)
1070 ip->ip_dst = p->ipopt_dst;
1071 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1072 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1073 if (n == NULL) {
1074 *phlen = 0;
1075 return (m);
1076 }
1077 M_MOVE_PKTHDR(n, m);
1078 n->m_pkthdr.rcvif = NULL;
1079 #ifdef MAC
1080 mac_copy_mbuf(m, n);
1081 #endif
1082 n->m_pkthdr.len += optlen;
1083 m->m_len -= sizeof(struct ip);
1084 m->m_data += sizeof(struct ip);
1085 n->m_next = m;
1086 m = n;
1087 m->m_len = optlen + sizeof(struct ip);
1088 m->m_data += max_linkhdr;
1089 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1090 } else {
1091 m->m_data -= optlen;
1092 m->m_len += optlen;
1093 m->m_pkthdr.len += optlen;
1094 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1095 }
1096 ip = mtod(m, struct ip *);
1097 bcopy(p->ipopt_list, ip + 1, optlen);
1098 *phlen = sizeof(struct ip) + optlen;
1099 ip->ip_v = IPVERSION;
1100 ip->ip_hl = *phlen >> 2;
1101 ip->ip_len += optlen;
1102 return (m);
1103 }
1104
1105 /*
1106 * Copy options from ip to jp,
1107 * omitting those not copied during fragmentation.
1108 */
1109 int
1110 ip_optcopy(ip, jp)
1111 struct ip *ip, *jp;
1112 {
1113 register u_char *cp, *dp;
1114 int opt, optlen, cnt;
1115
1116 cp = (u_char *)(ip + 1);
1117 dp = (u_char *)(jp + 1);
1118 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1119 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1120 opt = cp[0];
1121 if (opt == IPOPT_EOL)
1122 break;
1123 if (opt == IPOPT_NOP) {
1124 /* Preserve for IP mcast tunnel's LSRR alignment. */
1125 *dp++ = IPOPT_NOP;
1126 optlen = 1;
1127 continue;
1128 }
1129
1130 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
1131 ("ip_optcopy: malformed ipv4 option"));
1132 optlen = cp[IPOPT_OLEN];
1133 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
1134 ("ip_optcopy: malformed ipv4 option"));
1135
1136 /* bogus lengths should have been caught by ip_dooptions */
1137 if (optlen > cnt)
1138 optlen = cnt;
1139 if (IPOPT_COPIED(opt)) {
1140 bcopy(cp, dp, optlen);
1141 dp += optlen;
1142 }
1143 }
1144 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1145 *dp++ = IPOPT_EOL;
1146 return (optlen);
1147 }
1148
1149 /*
1150 * IP socket option processing.
1151 *
1152 * There are two versions of this call in order to work around a race
1153 * condition in TCP in FreeBSD 6.x. In the TCP implementation, so->so_pcb
1154 * can become NULL if the pcb or pcbinfo lock isn't held. However, when
1155 * entering ip_ctloutput(), neither lock is held, and finding the pointer to
1156 * either lock requires follow so->so_pcb, which may be NULL.
1157 * ip_ctloutput_pcbinfo() accepts the pcbinfo pointer so that the lock can be
1158 * safely acquired. This is not required in FreeBSD 7.x because the
1159 * invariants on so->so_pcb are much stronger, so it cannot become NULL
1160 * while the socket is in use.
1161 */
1162 int
1163 ip_ctloutput_pcbinfo(so, sopt, pcbinfo)
1164 struct socket *so;
1165 struct sockopt *sopt;
1166 struct inpcbinfo *pcbinfo;
1167 {
1168 struct inpcb *inp = sotoinpcb(so);
1169 int error, optval;
1170
1171 if (pcbinfo == NULL)
1172 pcbinfo = inp->inp_pcbinfo;
1173
1174 error = optval = 0;
1175 if (sopt->sopt_level != IPPROTO_IP) {
1176 return (EINVAL);
1177 }
1178
1179 if (inp == NULL)
1180 return (EINVAL);
1181
1182 switch (sopt->sopt_dir) {
1183 case SOPT_SET:
1184 switch (sopt->sopt_name) {
1185 case IP_OPTIONS:
1186 #ifdef notyet
1187 case IP_RETOPTS:
1188 #endif
1189 {
1190 struct mbuf *m;
1191 if (sopt->sopt_valsize > MLEN) {
1192 error = EMSGSIZE;
1193 break;
1194 }
1195 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
1196 if (m == NULL) {
1197 error = ENOBUFS;
1198 break;
1199 }
1200 m->m_len = sopt->sopt_valsize;
1201 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1202 m->m_len);
1203 if (error) {
1204 m_free(m);
1205 break;
1206 }
1207 INP_INFO_WLOCK(pcbinfo);
1208 if (so->so_pcb == NULL) {
1209 INP_INFO_WUNLOCK(pcbinfo);
1210 m_free(m);
1211 error = EINVAL;
1212 break;
1213 }
1214 INP_LOCK(inp);
1215 INP_INFO_WUNLOCK(pcbinfo);
1216 error = ip_pcbopts(inp, sopt->sopt_name, m);
1217 INP_UNLOCK(inp);
1218 return (error);
1219 }
1220
1221 case IP_TOS:
1222 case IP_TTL:
1223 case IP_MINTTL:
1224 case IP_RECVOPTS:
1225 case IP_RECVRETOPTS:
1226 case IP_RECVDSTADDR:
1227 case IP_RECVTTL:
1228 case IP_RECVIF:
1229 case IP_FAITH:
1230 case IP_ONESBCAST:
1231 case IP_DONTFRAG:
1232 error = sooptcopyin(sopt, &optval, sizeof optval,
1233 sizeof optval);
1234 if (error)
1235 break;
1236
1237 INP_INFO_WLOCK(pcbinfo);
1238 if (so->so_pcb == NULL) {
1239 INP_INFO_WUNLOCK(pcbinfo);
1240 error = EINVAL;
1241 break;
1242 }
1243 INP_LOCK(inp);
1244 INP_INFO_WUNLOCK(pcbinfo);
1245 switch (sopt->sopt_name) {
1246 case IP_TOS:
1247 inp->inp_ip_tos = optval;
1248 break;
1249
1250 case IP_TTL:
1251 inp->inp_ip_ttl = optval;
1252 break;
1253
1254 case IP_MINTTL:
1255 if (optval > 0 && optval <= MAXTTL)
1256 inp->inp_ip_minttl = optval;
1257 else
1258 error = EINVAL;
1259 break;
1260
1261 #define OPTSET(bit) do { \
1262 INP_LOCK(inp); \
1263 if (optval) \
1264 inp->inp_flags |= bit; \
1265 else \
1266 inp->inp_flags &= ~bit; \
1267 INP_UNLOCK(inp); \
1268 } while (0)
1269
1270 case IP_RECVOPTS:
1271 OPTSET(INP_RECVOPTS);
1272 break;
1273
1274 case IP_RECVRETOPTS:
1275 OPTSET(INP_RECVRETOPTS);
1276 break;
1277
1278 case IP_RECVDSTADDR:
1279 OPTSET(INP_RECVDSTADDR);
1280 break;
1281
1282 case IP_RECVTTL:
1283 OPTSET(INP_RECVTTL);
1284 break;
1285
1286 case IP_RECVIF:
1287 OPTSET(INP_RECVIF);
1288 break;
1289
1290 case IP_FAITH:
1291 OPTSET(INP_FAITH);
1292 break;
1293
1294 case IP_ONESBCAST:
1295 OPTSET(INP_ONESBCAST);
1296 break;
1297 case IP_DONTFRAG:
1298 OPTSET(INP_DONTFRAG);
1299 break;
1300 }
1301 INP_UNLOCK(inp);
1302 break;
1303 #undef OPTSET
1304
1305 case IP_MULTICAST_IF:
1306 case IP_MULTICAST_VIF:
1307 case IP_MULTICAST_TTL:
1308 case IP_MULTICAST_LOOP:
1309 case IP_ADD_MEMBERSHIP:
1310 case IP_DROP_MEMBERSHIP:
1311 error = ip_setmoptions(inp, sopt);
1312 break;
1313
1314 case IP_PORTRANGE:
1315 error = sooptcopyin(sopt, &optval, sizeof optval,
1316 sizeof optval);
1317 if (error)
1318 break;
1319
1320 INP_INFO_WLOCK(pcbinfo);
1321 if (so->so_pcb == NULL) {
1322 error = EINVAL;
1323 break;
1324 }
1325 INP_LOCK(inp);
1326 INP_INFO_WUNLOCK(pcbinfo);
1327 switch (optval) {
1328 case IP_PORTRANGE_DEFAULT:
1329 inp->inp_flags &= ~(INP_LOWPORT);
1330 inp->inp_flags &= ~(INP_HIGHPORT);
1331 break;
1332
1333 case IP_PORTRANGE_HIGH:
1334 inp->inp_flags &= ~(INP_LOWPORT);
1335 inp->inp_flags |= INP_HIGHPORT;
1336 break;
1337
1338 case IP_PORTRANGE_LOW:
1339 inp->inp_flags &= ~(INP_HIGHPORT);
1340 inp->inp_flags |= INP_LOWPORT;
1341 break;
1342
1343 default:
1344 error = EINVAL;
1345 break;
1346 }
1347 INP_UNLOCK(inp);
1348 break;
1349
1350 #if defined(IPSEC) || defined(FAST_IPSEC)
1351 case IP_IPSEC_POLICY:
1352 {
1353 caddr_t req;
1354 size_t len = 0;
1355 int priv;
1356 struct mbuf *m;
1357 int optname;
1358
1359 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1360 break;
1361 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1362 break;
1363 priv = (sopt->sopt_td != NULL &&
1364 suser(sopt->sopt_td) != 0) ? 0 : 1;
1365 req = mtod(m, caddr_t);
1366 len = m->m_len;
1367 optname = sopt->sopt_name;
1368 if (so->so_pcb == NULL) {
1369 m_free(m);
1370 error = EINVAL;
1371 break;
1372 }
1373 error = ipsec4_set_policy(inp, optname, req, len, priv);
1374 m_freem(m);
1375 break;
1376 }
1377 #endif /*IPSEC*/
1378
1379 default:
1380 error = ENOPROTOOPT;
1381 break;
1382 }
1383 break;
1384
1385 case SOPT_GET:
1386 switch (sopt->sopt_name) {
1387 case IP_OPTIONS:
1388 case IP_RETOPTS:
1389 if (inp->inp_options)
1390 error = sooptcopyout(sopt,
1391 mtod(inp->inp_options,
1392 char *),
1393 inp->inp_options->m_len);
1394 else
1395 sopt->sopt_valsize = 0;
1396 break;
1397
1398 case IP_TOS:
1399 case IP_TTL:
1400 case IP_MINTTL:
1401 case IP_RECVOPTS:
1402 case IP_RECVRETOPTS:
1403 case IP_RECVDSTADDR:
1404 case IP_RECVTTL:
1405 case IP_RECVIF:
1406 case IP_PORTRANGE:
1407 case IP_FAITH:
1408 case IP_ONESBCAST:
1409 case IP_DONTFRAG:
1410 switch (sopt->sopt_name) {
1411
1412 case IP_TOS:
1413 optval = inp->inp_ip_tos;
1414 break;
1415
1416 case IP_TTL:
1417 optval = inp->inp_ip_ttl;
1418 break;
1419
1420 case IP_MINTTL:
1421 optval = inp->inp_ip_minttl;
1422 break;
1423
1424 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1425
1426 case IP_RECVOPTS:
1427 optval = OPTBIT(INP_RECVOPTS);
1428 break;
1429
1430 case IP_RECVRETOPTS:
1431 optval = OPTBIT(INP_RECVRETOPTS);
1432 break;
1433
1434 case IP_RECVDSTADDR:
1435 optval = OPTBIT(INP_RECVDSTADDR);
1436 break;
1437
1438 case IP_RECVTTL:
1439 optval = OPTBIT(INP_RECVTTL);
1440 break;
1441
1442 case IP_RECVIF:
1443 optval = OPTBIT(INP_RECVIF);
1444 break;
1445
1446 case IP_PORTRANGE:
1447 if (inp->inp_flags & INP_HIGHPORT)
1448 optval = IP_PORTRANGE_HIGH;
1449 else if (inp->inp_flags & INP_LOWPORT)
1450 optval = IP_PORTRANGE_LOW;
1451 else
1452 optval = 0;
1453 break;
1454
1455 case IP_FAITH:
1456 optval = OPTBIT(INP_FAITH);
1457 break;
1458
1459 case IP_ONESBCAST:
1460 optval = OPTBIT(INP_ONESBCAST);
1461 break;
1462 case IP_DONTFRAG:
1463 optval = OPTBIT(INP_DONTFRAG);
1464 break;
1465 }
1466 error = sooptcopyout(sopt, &optval, sizeof optval);
1467 break;
1468
1469 case IP_MULTICAST_IF:
1470 case IP_MULTICAST_VIF:
1471 case IP_MULTICAST_TTL:
1472 case IP_MULTICAST_LOOP:
1473 case IP_ADD_MEMBERSHIP:
1474 case IP_DROP_MEMBERSHIP:
1475 error = ip_getmoptions(inp, sopt);
1476 break;
1477
1478 #if defined(IPSEC) || defined(FAST_IPSEC)
1479 case IP_IPSEC_POLICY:
1480 {
1481 struct mbuf *m = NULL;
1482 caddr_t req = NULL;
1483 size_t len = 0;
1484
1485 if (m != 0) {
1486 req = mtod(m, caddr_t);
1487 len = m->m_len;
1488 }
1489 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1490 if (error == 0)
1491 error = soopt_mcopyout(sopt, m); /* XXX */
1492 if (error == 0)
1493 m_freem(m);
1494 break;
1495 }
1496 #endif /*IPSEC*/
1497
1498 default:
1499 error = ENOPROTOOPT;
1500 break;
1501 }
1502 break;
1503 }
1504 return (error);
1505 }
1506
1507 int
1508 ip_ctloutput(so, sopt)
1509 struct socket *so;
1510 struct sockopt *sopt;
1511 {
1512
1513 return (ip_ctloutput_pcbinfo(so, sopt, NULL));
1514 }
1515
1516 /*
1517 * Set up IP options in pcb for insertion in output packets.
1518 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1519 * with destination address if source routed.
1520 */
1521 static int
1522 ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
1523 {
1524 register int cnt, optlen;
1525 register u_char *cp;
1526 struct mbuf **pcbopt;
1527 u_char opt;
1528
1529 INP_LOCK_ASSERT(inp);
1530
1531 pcbopt = &inp->inp_options;
1532
1533 /* turn off any old options */
1534 if (*pcbopt)
1535 (void)m_free(*pcbopt);
1536 *pcbopt = 0;
1537 if (m == NULL || m->m_len == 0) {
1538 /*
1539 * Only turning off any previous options.
1540 */
1541 if (m != NULL)
1542 (void)m_free(m);
1543 return (0);
1544 }
1545
1546 if (m->m_len % sizeof(int32_t))
1547 goto bad;
1548 /*
1549 * IP first-hop destination address will be stored before
1550 * actual options; move other options back
1551 * and clear it when none present.
1552 */
1553 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1554 goto bad;
1555 cnt = m->m_len;
1556 m->m_len += sizeof(struct in_addr);
1557 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1558 bcopy(mtod(m, void *), cp, (unsigned)cnt);
1559 bzero(mtod(m, void *), sizeof(struct in_addr));
1560
1561 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1562 opt = cp[IPOPT_OPTVAL];
1563 if (opt == IPOPT_EOL)
1564 break;
1565 if (opt == IPOPT_NOP)
1566 optlen = 1;
1567 else {
1568 if (cnt < IPOPT_OLEN + sizeof(*cp))
1569 goto bad;
1570 optlen = cp[IPOPT_OLEN];
1571 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1572 goto bad;
1573 }
1574 switch (opt) {
1575
1576 default:
1577 break;
1578
1579 case IPOPT_LSRR:
1580 case IPOPT_SSRR:
1581 /*
1582 * user process specifies route as:
1583 * ->A->B->C->D
1584 * D must be our final destination (but we can't
1585 * check that since we may not have connected yet).
1586 * A is first hop destination, which doesn't appear in
1587 * actual IP option, but is stored before the options.
1588 */
1589 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1590 goto bad;
1591 m->m_len -= sizeof(struct in_addr);
1592 cnt -= sizeof(struct in_addr);
1593 optlen -= sizeof(struct in_addr);
1594 cp[IPOPT_OLEN] = optlen;
1595 /*
1596 * Move first hop before start of options.
1597 */
1598 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1599 sizeof(struct in_addr));
1600 /*
1601 * Then copy rest of options back
1602 * to close up the deleted entry.
1603 */
1604 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
1605 &cp[IPOPT_OFFSET+1],
1606 (unsigned)cnt - (IPOPT_MINOFF - 1));
1607 break;
1608 }
1609 }
1610 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1611 goto bad;
1612 *pcbopt = m;
1613 return (0);
1614
1615 bad:
1616 (void)m_free(m);
1617 return (EINVAL);
1618 }
1619
1620 /*
1621 * XXX
1622 * The whole multicast option thing needs to be re-thought.
1623 * Several of these options are equally applicable to non-multicast
1624 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1625 * standard option (IP_TTL).
1626 */
1627
1628 /*
1629 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1630 */
1631 static struct ifnet *
1632 ip_multicast_if(a, ifindexp)
1633 struct in_addr *a;
1634 int *ifindexp;
1635 {
1636 int ifindex;
1637 struct ifnet *ifp;
1638
1639 if (ifindexp)
1640 *ifindexp = 0;
1641 if (ntohl(a->s_addr) >> 24 == 0) {
1642 ifindex = ntohl(a->s_addr) & 0xffffff;
1643 if (ifindex < 0 || if_index < ifindex)
1644 return NULL;
1645 ifp = ifnet_byindex(ifindex);
1646 if (ifindexp)
1647 *ifindexp = ifindex;
1648 } else {
1649 INADDR_TO_IFP(*a, ifp);
1650 }
1651 return ifp;
1652 }
1653
1654 /*
1655 * Given an inpcb, return its multicast options structure pointer. Accepts
1656 * an unlocked inpcb pointer, but will return it locked. May sleep.
1657 */
1658 static struct ip_moptions *
1659 ip_findmoptions(struct inpcb *inp)
1660 {
1661 struct ip_moptions *imo;
1662
1663 INP_LOCK(inp);
1664 if (inp->inp_moptions != NULL)
1665 return (inp->inp_moptions);
1666
1667 INP_UNLOCK(inp);
1668
1669 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1670
1671 imo->imo_multicast_ifp = NULL;
1672 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1673 imo->imo_multicast_vif = -1;
1674 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1675 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1676 imo->imo_num_memberships = 0;
1677
1678 INP_LOCK(inp);
1679 if (inp->inp_moptions != NULL) {
1680 free(imo, M_IPMOPTS);
1681 return (inp->inp_moptions);
1682 }
1683 inp->inp_moptions = imo;
1684 return (imo);
1685 }
1686
1687 /*
1688 * Set the IP multicast options in response to user setsockopt().
1689 */
1690 static int
1691 ip_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1692 {
1693 int error = 0;
1694 int i;
1695 struct in_addr addr;
1696 struct ip_mreq mreq;
1697 struct ifnet *ifp;
1698 struct ip_moptions *imo;
1699 struct route ro;
1700 struct sockaddr_in *dst;
1701 int ifindex;
1702 int s;
1703
1704 switch (sopt->sopt_name) {
1705 /* store an index number for the vif you wanna use in the send */
1706 case IP_MULTICAST_VIF:
1707 if (legal_vif_num == 0) {
1708 error = EOPNOTSUPP;
1709 break;
1710 }
1711 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1712 if (error)
1713 break;
1714 if (!legal_vif_num(i) && (i != -1)) {
1715 error = EINVAL;
1716 break;
1717 }
1718 imo = ip_findmoptions(inp);
1719 imo->imo_multicast_vif = i;
1720 INP_UNLOCK(inp);
1721 break;
1722
1723 case IP_MULTICAST_IF:
1724 /*
1725 * Select the interface for outgoing multicast packets.
1726 */
1727 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1728 if (error)
1729 break;
1730 /*
1731 * INADDR_ANY is used to remove a previous selection.
1732 * When no interface is selected, a default one is
1733 * chosen every time a multicast packet is sent.
1734 */
1735 imo = ip_findmoptions(inp);
1736 if (addr.s_addr == INADDR_ANY) {
1737 imo->imo_multicast_ifp = NULL;
1738 INP_UNLOCK(inp);
1739 break;
1740 }
1741 /*
1742 * The selected interface is identified by its local
1743 * IP address. Find the interface and confirm that
1744 * it supports multicasting.
1745 */
1746 s = splimp();
1747 ifp = ip_multicast_if(&addr, &ifindex);
1748 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1749 INP_UNLOCK(inp);
1750 splx(s);
1751 error = EADDRNOTAVAIL;
1752 break;
1753 }
1754 imo->imo_multicast_ifp = ifp;
1755 if (ifindex)
1756 imo->imo_multicast_addr = addr;
1757 else
1758 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1759 INP_UNLOCK(inp);
1760 splx(s);
1761 break;
1762
1763 case IP_MULTICAST_TTL:
1764 /*
1765 * Set the IP time-to-live for outgoing multicast packets.
1766 * The original multicast API required a char argument,
1767 * which is inconsistent with the rest of the socket API.
1768 * We allow either a char or an int.
1769 */
1770 if (sopt->sopt_valsize == 1) {
1771 u_char ttl;
1772 error = sooptcopyin(sopt, &ttl, 1, 1);
1773 if (error)
1774 break;
1775 imo = ip_findmoptions(inp);
1776 imo->imo_multicast_ttl = ttl;
1777 INP_UNLOCK(inp);
1778 } else {
1779 u_int ttl;
1780 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1781 sizeof ttl);
1782 if (error)
1783 break;
1784 if (ttl > 255)
1785 error = EINVAL;
1786 else {
1787 imo = ip_findmoptions(inp);
1788 imo->imo_multicast_ttl = ttl;
1789 INP_UNLOCK(inp);
1790 }
1791 }
1792 break;
1793
1794 case IP_MULTICAST_LOOP:
1795 /*
1796 * Set the loopback flag for outgoing multicast packets.
1797 * Must be zero or one. The original multicast API required a
1798 * char argument, which is inconsistent with the rest
1799 * of the socket API. We allow either a char or an int.
1800 */
1801 if (sopt->sopt_valsize == 1) {
1802 u_char loop;
1803 error = sooptcopyin(sopt, &loop, 1, 1);
1804 if (error)
1805 break;
1806 imo = ip_findmoptions(inp);
1807 imo->imo_multicast_loop = !!loop;
1808 INP_UNLOCK(inp);
1809 } else {
1810 u_int loop;
1811 error = sooptcopyin(sopt, &loop, sizeof loop,
1812 sizeof loop);
1813 if (error)
1814 break;
1815 imo = ip_findmoptions(inp);
1816 imo->imo_multicast_loop = !!loop;
1817 INP_UNLOCK(inp);
1818 }
1819 break;
1820
1821 case IP_ADD_MEMBERSHIP:
1822 /*
1823 * Add a multicast group membership.
1824 * Group must be a valid IP multicast address.
1825 */
1826 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1827 if (error)
1828 break;
1829
1830 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1831 error = EINVAL;
1832 break;
1833 }
1834 s = splimp();
1835 /*
1836 * If no interface address was provided, use the interface of
1837 * the route to the given multicast address.
1838 */
1839 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1840 bzero((caddr_t)&ro, sizeof(ro));
1841 dst = (struct sockaddr_in *)&ro.ro_dst;
1842 dst->sin_len = sizeof(*dst);
1843 dst->sin_family = AF_INET;
1844 dst->sin_addr = mreq.imr_multiaddr;
1845 rtalloc_ign(&ro, RTF_CLONING);
1846 if (ro.ro_rt == NULL) {
1847 error = EADDRNOTAVAIL;
1848 splx(s);
1849 break;
1850 }
1851 ifp = ro.ro_rt->rt_ifp;
1852 RTFREE(ro.ro_rt);
1853 }
1854 else {
1855 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1856 }
1857
1858 /*
1859 * See if we found an interface, and confirm that it
1860 * supports multicast.
1861 */
1862 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1863 error = EADDRNOTAVAIL;
1864 splx(s);
1865 break;
1866 }
1867 /*
1868 * See if the membership already exists or if all the
1869 * membership slots are full.
1870 */
1871 imo = ip_findmoptions(inp);
1872 for (i = 0; i < imo->imo_num_memberships; ++i) {
1873 if (imo->imo_membership[i]->inm_ifp == ifp &&
1874 imo->imo_membership[i]->inm_addr.s_addr
1875 == mreq.imr_multiaddr.s_addr)
1876 break;
1877 }
1878 if (i < imo->imo_num_memberships) {
1879 INP_UNLOCK(inp);
1880 error = EADDRINUSE;
1881 splx(s);
1882 break;
1883 }
1884 if (i == IP_MAX_MEMBERSHIPS) {
1885 INP_UNLOCK(inp);
1886 error = ETOOMANYREFS;
1887 splx(s);
1888 break;
1889 }
1890 /*
1891 * Everything looks good; add a new record to the multicast
1892 * address list for the given interface.
1893 */
1894 if ((imo->imo_membership[i] =
1895 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1896 INP_UNLOCK(inp);
1897 error = ENOBUFS;
1898 splx(s);
1899 break;
1900 }
1901 ++imo->imo_num_memberships;
1902 INP_UNLOCK(inp);
1903 splx(s);
1904 break;
1905
1906 case IP_DROP_MEMBERSHIP:
1907 /*
1908 * Drop a multicast group membership.
1909 * Group must be a valid IP multicast address.
1910 */
1911 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1912 if (error)
1913 break;
1914
1915 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1916 error = EINVAL;
1917 break;
1918 }
1919
1920 s = splimp();
1921 /*
1922 * If an interface address was specified, get a pointer
1923 * to its ifnet structure.
1924 */
1925 if (mreq.imr_interface.s_addr == INADDR_ANY)
1926 ifp = NULL;
1927 else {
1928 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1929 if (ifp == NULL) {
1930 error = EADDRNOTAVAIL;
1931 splx(s);
1932 break;
1933 }
1934 }
1935 /*
1936 * Find the membership in the membership array.
1937 */
1938 imo = ip_findmoptions(inp);
1939 for (i = 0; i < imo->imo_num_memberships; ++i) {
1940 if ((ifp == NULL ||
1941 imo->imo_membership[i]->inm_ifp == ifp) &&
1942 imo->imo_membership[i]->inm_addr.s_addr ==
1943 mreq.imr_multiaddr.s_addr)
1944 break;
1945 }
1946 if (i == imo->imo_num_memberships) {
1947 INP_UNLOCK(inp);
1948 error = EADDRNOTAVAIL;
1949 splx(s);
1950 break;
1951 }
1952 /*
1953 * Give up the multicast address record to which the
1954 * membership points.
1955 */
1956 in_delmulti(imo->imo_membership[i]);
1957 /*
1958 * Remove the gap in the membership array.
1959 */
1960 for (++i; i < imo->imo_num_memberships; ++i)
1961 imo->imo_membership[i-1] = imo->imo_membership[i];
1962 --imo->imo_num_memberships;
1963 INP_UNLOCK(inp);
1964 splx(s);
1965 break;
1966
1967 default:
1968 error = EOPNOTSUPP;
1969 break;
1970 }
1971
1972 return (error);
1973 }
1974
1975 /*
1976 * Return the IP multicast options in response to user getsockopt().
1977 */
1978 static int
1979 ip_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1980 {
1981 struct ip_moptions *imo;
1982 struct in_addr addr;
1983 struct in_ifaddr *ia;
1984 int error, optval;
1985 u_char coptval;
1986
1987 INP_LOCK(inp);
1988 imo = inp->inp_moptions;
1989
1990 error = 0;
1991 switch (sopt->sopt_name) {
1992 case IP_MULTICAST_VIF:
1993 if (imo != NULL)
1994 optval = imo->imo_multicast_vif;
1995 else
1996 optval = -1;
1997 INP_UNLOCK(inp);
1998 error = sooptcopyout(sopt, &optval, sizeof optval);
1999 break;
2000
2001 case IP_MULTICAST_IF:
2002 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2003 addr.s_addr = INADDR_ANY;
2004 else if (imo->imo_multicast_addr.s_addr) {
2005 /* return the value user has set */
2006 addr = imo->imo_multicast_addr;
2007 } else {
2008 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2009 addr.s_addr = (ia == NULL) ? INADDR_ANY
2010 : IA_SIN(ia)->sin_addr.s_addr;
2011 }
2012 INP_UNLOCK(inp);
2013 error = sooptcopyout(sopt, &addr, sizeof addr);
2014 break;
2015
2016 case IP_MULTICAST_TTL:
2017 if (imo == 0)
2018 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2019 else
2020 optval = coptval = imo->imo_multicast_ttl;
2021 INP_UNLOCK(inp);
2022 if (sopt->sopt_valsize == 1)
2023 error = sooptcopyout(sopt, &coptval, 1);
2024 else
2025 error = sooptcopyout(sopt, &optval, sizeof optval);
2026 break;
2027
2028 case IP_MULTICAST_LOOP:
2029 if (imo == 0)
2030 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2031 else
2032 optval = coptval = imo->imo_multicast_loop;
2033 INP_UNLOCK(inp);
2034 if (sopt->sopt_valsize == 1)
2035 error = sooptcopyout(sopt, &coptval, 1);
2036 else
2037 error = sooptcopyout(sopt, &optval, sizeof optval);
2038 break;
2039
2040 default:
2041 INP_UNLOCK(inp);
2042 error = ENOPROTOOPT;
2043 break;
2044 }
2045 INP_UNLOCK_ASSERT(inp);
2046
2047 return (error);
2048 }
2049
2050 /*
2051 * Discard the IP multicast options.
2052 */
2053 void
2054 ip_freemoptions(imo)
2055 register struct ip_moptions *imo;
2056 {
2057 register int i;
2058
2059 if (imo != NULL) {
2060 for (i = 0; i < imo->imo_num_memberships; ++i)
2061 in_delmulti(imo->imo_membership[i]);
2062 free(imo, M_IPMOPTS);
2063 }
2064 }
2065
2066 /*
2067 * Routine called from ip_output() to loop back a copy of an IP multicast
2068 * packet to the input queue of a specified interface. Note that this
2069 * calls the output routine of the loopback "driver", but with an interface
2070 * pointer that might NOT be a loopback interface -- evil, but easier than
2071 * replicating that code here.
2072 */
2073 static void
2074 ip_mloopback(ifp, m, dst, hlen)
2075 struct ifnet *ifp;
2076 register struct mbuf *m;
2077 register struct sockaddr_in *dst;
2078 int hlen;
2079 {
2080 register struct ip *ip;
2081 struct mbuf *copym;
2082
2083 copym = m_copy(m, 0, M_COPYALL);
2084 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2085 copym = m_pullup(copym, hlen);
2086 if (copym != NULL) {
2087 /* If needed, compute the checksum and mark it as valid. */
2088 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2089 in_delayed_cksum(copym);
2090 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
2091 copym->m_pkthdr.csum_flags |=
2092 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2093 copym->m_pkthdr.csum_data = 0xffff;
2094 }
2095 /*
2096 * We don't bother to fragment if the IP length is greater
2097 * than the interface's MTU. Can this possibly matter?
2098 */
2099 ip = mtod(copym, struct ip *);
2100 ip->ip_len = htons(ip->ip_len);
2101 ip->ip_off = htons(ip->ip_off);
2102 ip->ip_sum = 0;
2103 ip->ip_sum = in_cksum(copym, hlen);
2104 /*
2105 * NB:
2106 * It's not clear whether there are any lingering
2107 * reentrancy problems in other areas which might
2108 * be exposed by using ip_input directly (in
2109 * particular, everything which modifies the packet
2110 * in-place). Yet another option is using the
2111 * protosw directly to deliver the looped back
2112 * packet. For the moment, we'll err on the side
2113 * of safety by using if_simloop().
2114 */
2115 #if 1 /* XXX */
2116 if (dst->sin_family != AF_INET) {
2117 printf("ip_mloopback: bad address family %d\n",
2118 dst->sin_family);
2119 dst->sin_family = AF_INET;
2120 }
2121 #endif
2122
2123 #ifdef notdef
2124 copym->m_pkthdr.rcvif = ifp;
2125 ip_input(copym);
2126 #else
2127 if_simloop(ifp, copym, dst->sin_family, 0);
2128 #endif
2129 }
2130 }
Cache object: 5a597cf49c40f9126299405468b4895d
|