1 /*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
30 * $FreeBSD$
31 */
32
33 #include "opt_ipfw.h"
34 #include "opt_ipsec.h"
35 #include "opt_mac.h"
36 #include "opt_mbuf_stress_test.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/mac.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48
49 #include <net/if.h>
50 #include <net/netisr.h>
51 #include <net/pfil.h>
52 #include <net/route.h>
53
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_var.h>
60
61 #include <machine/in_cksum.h>
62
63 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
64
65 #ifdef IPSEC
66 #include <netinet6/ipsec.h>
67 #include <netkey/key.h>
68 #ifdef IPSEC_DEBUG
69 #include <netkey/key_debug.h>
70 #else
71 #define KEYDEBUG(lev,arg)
72 #endif
73 #endif /*IPSEC*/
74
75 #ifdef FAST_IPSEC
76 #include <netipsec/ipsec.h>
77 #include <netipsec/xform.h>
78 #include <netipsec/key.h>
79 #endif /*FAST_IPSEC*/
80
81 #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
82 x, (ntohl(a.s_addr)>>24)&0xFF,\
83 (ntohl(a.s_addr)>>16)&0xFF,\
84 (ntohl(a.s_addr)>>8)&0xFF,\
85 (ntohl(a.s_addr))&0xFF, y);
86
87 u_short ip_id;
88
89 #ifdef MBUF_STRESS_TEST
90 int mbuf_frag_size = 0;
91 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
92 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
93 #endif
94
95 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
96 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
97 static void ip_mloopback
98 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
99 static int ip_getmoptions(struct inpcb *, struct sockopt *);
100 static int ip_pcbopts(struct inpcb *, int, struct mbuf *);
101 static int ip_setmoptions(struct inpcb *, struct sockopt *);
102 static struct ip_moptions *ip_findmoptions(struct inpcb *inp);
103
104 int ip_optcopy(struct ip *, struct ip *);
105
106
107 extern struct protosw inetsw[];
108
109 /*
110 * IP output. The packet in mbuf chain m contains a skeletal IP
111 * header (with len, off, ttl, proto, tos, src, dst).
112 * The mbuf chain containing the packet will be freed.
113 * The mbuf opt, if present, will not be freed.
114 * In the IP forwarding case, the packet will arrive with options already
115 * inserted, so must have a NULL opt pointer.
116 */
117 int
118 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
119 int flags, struct ip_moptions *imo, struct inpcb *inp)
120 {
121 struct ip *ip;
122 struct ifnet *ifp = NULL; /* keep compiler happy */
123 struct mbuf *m0;
124 int hlen = sizeof (struct ip);
125 int len, error = 0;
126 struct sockaddr_in *dst = NULL; /* keep compiler happy */
127 struct in_ifaddr *ia = NULL;
128 int isbroadcast, sw_csum;
129 struct route iproute;
130 struct in_addr odst;
131 #ifdef IPFIREWALL_FORWARD
132 struct m_tag *fwd_tag = NULL;
133 #endif
134 #ifdef IPSEC
135 struct secpolicy *sp = NULL;
136 #endif
137 #ifdef FAST_IPSEC
138 struct secpolicy *sp = NULL;
139 struct tdb_ident *tdbi;
140 struct m_tag *mtag;
141 int s;
142 #endif /* FAST_IPSEC */
143
144 M_ASSERTPKTHDR(m);
145
146 if (ro == NULL) {
147 ro = &iproute;
148 bzero(ro, sizeof (*ro));
149 }
150
151 if (inp != NULL)
152 INP_LOCK_ASSERT(inp);
153
154 if (opt) {
155 len = 0;
156 m = ip_insertoptions(m, opt, &len);
157 if (len != 0)
158 hlen = len;
159 }
160 ip = mtod(m, struct ip *);
161
162 /*
163 * Fill in IP header. If we are not allowing fragmentation,
164 * then the ip_id field is meaningless, but we don't set it
165 * to zero. Doing so causes various problems when devices along
166 * the path (routers, load balancers, firewalls, etc.) illegally
167 * disable DF on our packet. Note that a 16-bit counter
168 * will wrap around in less than 10 seconds at 100 Mbit/s on a
169 * medium with MTU 1500. See Steven M. Bellovin, "A Technique
170 * for Counting NATted Hosts", Proc. IMW'02, available at
171 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
172 */
173 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
174 ip->ip_v = IPVERSION;
175 ip->ip_hl = hlen >> 2;
176 ip->ip_id = ip_newid();
177 ipstat.ips_localout++;
178 } else {
179 hlen = ip->ip_hl << 2;
180 }
181
182 dst = (struct sockaddr_in *)&ro->ro_dst;
183 again:
184 /*
185 * If there is a cached route,
186 * check that it is to the same destination
187 * and is still up. If not, free it and try again.
188 * The address family should also be checked in case of sharing the
189 * cache with IPv6.
190 */
191 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
192 dst->sin_family != AF_INET ||
193 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
194 RTFREE(ro->ro_rt);
195 ro->ro_rt = (struct rtentry *)0;
196 }
197 #ifdef IPFIREWALL_FORWARD
198 if (ro->ro_rt == NULL && fwd_tag == NULL) {
199 #else
200 if (ro->ro_rt == NULL) {
201 #endif
202 bzero(dst, sizeof(*dst));
203 dst->sin_family = AF_INET;
204 dst->sin_len = sizeof(*dst);
205 dst->sin_addr = ip->ip_dst;
206 }
207 /*
208 * If routing to interface only, short circuit routing lookup.
209 * The use of an all-ones broadcast address implies this; an
210 * interface is specified by the broadcast address of an interface,
211 * or the destination address of a ptp interface.
212 */
213 if (flags & IP_SENDONES) {
214 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
215 (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
216 ipstat.ips_noroute++;
217 error = ENETUNREACH;
218 goto bad;
219 }
220 ip->ip_dst.s_addr = INADDR_BROADCAST;
221 dst->sin_addr = ip->ip_dst;
222 ifp = ia->ia_ifp;
223 ip->ip_ttl = 1;
224 isbroadcast = 1;
225 } else if (flags & IP_ROUTETOIF) {
226 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
227 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
228 ipstat.ips_noroute++;
229 error = ENETUNREACH;
230 goto bad;
231 }
232 ifp = ia->ia_ifp;
233 ip->ip_ttl = 1;
234 isbroadcast = in_broadcast(dst->sin_addr, ifp);
235 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
236 imo != NULL && imo->imo_multicast_ifp != NULL) {
237 /*
238 * Bypass the normal routing lookup for multicast
239 * packets if the interface is specified.
240 */
241 ifp = imo->imo_multicast_ifp;
242 IFP_TO_IA(ifp, ia);
243 isbroadcast = 0; /* fool gcc */
244 } else {
245 /*
246 * We want to do any cloning requested by the link layer,
247 * as this is probably required in all cases for correct
248 * operation (as it is for ARP).
249 */
250 if (ro->ro_rt == NULL)
251 rtalloc_ign(ro, 0);
252 if (ro->ro_rt == NULL) {
253 ipstat.ips_noroute++;
254 error = EHOSTUNREACH;
255 goto bad;
256 }
257 ia = ifatoia(ro->ro_rt->rt_ifa);
258 ifp = ro->ro_rt->rt_ifp;
259 ro->ro_rt->rt_rmx.rmx_pksent++;
260 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
261 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
262 if (ro->ro_rt->rt_flags & RTF_HOST)
263 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
264 else
265 isbroadcast = in_broadcast(dst->sin_addr, ifp);
266 }
267 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
268 struct in_multi *inm;
269
270 m->m_flags |= M_MCAST;
271 /*
272 * IP destination address is multicast. Make sure "dst"
273 * still points to the address in "ro". (It may have been
274 * changed to point to a gateway address, above.)
275 */
276 dst = (struct sockaddr_in *)&ro->ro_dst;
277 /*
278 * See if the caller provided any multicast options
279 */
280 if (imo != NULL) {
281 ip->ip_ttl = imo->imo_multicast_ttl;
282 if (imo->imo_multicast_vif != -1)
283 ip->ip_src.s_addr =
284 ip_mcast_src ?
285 ip_mcast_src(imo->imo_multicast_vif) :
286 INADDR_ANY;
287 } else
288 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
289 /*
290 * Confirm that the outgoing interface supports multicast.
291 */
292 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
293 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
294 ipstat.ips_noroute++;
295 error = ENETUNREACH;
296 goto bad;
297 }
298 }
299 /*
300 * If source address not specified yet, use address
301 * of outgoing interface.
302 */
303 if (ip->ip_src.s_addr == INADDR_ANY) {
304 /* Interface may have no addresses. */
305 if (ia != NULL)
306 ip->ip_src = IA_SIN(ia)->sin_addr;
307 }
308
309 IN_MULTI_LOCK();
310 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
311 if (inm != NULL &&
312 (imo == NULL || imo->imo_multicast_loop)) {
313 IN_MULTI_UNLOCK();
314 /*
315 * If we belong to the destination multicast group
316 * on the outgoing interface, and the caller did not
317 * forbid loopback, loop back a copy.
318 */
319 ip_mloopback(ifp, m, dst, hlen);
320 }
321 else {
322 IN_MULTI_UNLOCK();
323 /*
324 * If we are acting as a multicast router, perform
325 * multicast forwarding as if the packet had just
326 * arrived on the interface to which we are about
327 * to send. The multicast forwarding function
328 * recursively calls this function, using the
329 * IP_FORWARDING flag to prevent infinite recursion.
330 *
331 * Multicasts that are looped back by ip_mloopback(),
332 * above, will be forwarded by the ip_input() routine,
333 * if necessary.
334 */
335 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
336 /*
337 * If rsvp daemon is not running, do not
338 * set ip_moptions. This ensures that the packet
339 * is multicast and not just sent down one link
340 * as prescribed by rsvpd.
341 */
342 if (!rsvp_on)
343 imo = NULL;
344 if (ip_mforward &&
345 ip_mforward(ip, ifp, m, imo) != 0) {
346 m_freem(m);
347 goto done;
348 }
349 }
350 }
351
352 /*
353 * Multicasts with a time-to-live of zero may be looped-
354 * back, above, but must not be transmitted on a network.
355 * Also, multicasts addressed to the loopback interface
356 * are not sent -- the above call to ip_mloopback() will
357 * loop back a copy if this host actually belongs to the
358 * destination group on the loopback interface.
359 */
360 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
361 m_freem(m);
362 goto done;
363 }
364
365 goto sendit;
366 }
367 #ifndef notdef
368 /*
369 * If the source address is not specified yet, use the address
370 * of the outoing interface.
371 */
372 if (ip->ip_src.s_addr == INADDR_ANY) {
373 /* Interface may have no addresses. */
374 if (ia != NULL) {
375 ip->ip_src = IA_SIN(ia)->sin_addr;
376 }
377 }
378 #endif /* notdef */
379 /*
380 * Verify that we have any chance at all of being able to queue the
381 * packet or packet fragments, unless ALTQ is enabled on the given
382 * interface in which case packetdrop should be done by queueing.
383 */
384 #ifdef ALTQ
385 if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
386 ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
387 ifp->if_snd.ifq_maxlen))
388 #else
389 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
390 ifp->if_snd.ifq_maxlen)
391 #endif /* ALTQ */
392 {
393 error = ENOBUFS;
394 ipstat.ips_odropped++;
395 ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
396 goto bad;
397 }
398
399 /*
400 * Look for broadcast address and
401 * verify user is allowed to send
402 * such a packet.
403 */
404 if (isbroadcast) {
405 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
406 error = EADDRNOTAVAIL;
407 goto bad;
408 }
409 if ((flags & IP_ALLOWBROADCAST) == 0) {
410 error = EACCES;
411 goto bad;
412 }
413 /* don't allow broadcast messages to be fragmented */
414 if (ip->ip_len > ifp->if_mtu) {
415 error = EMSGSIZE;
416 goto bad;
417 }
418 m->m_flags |= M_BCAST;
419 } else {
420 m->m_flags &= ~M_BCAST;
421 }
422
423 sendit:
424 #ifdef IPSEC
425 /* get SP for this packet */
426 if (inp == NULL)
427 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
428 flags, &error);
429 else
430 sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
431
432 if (sp == NULL) {
433 ipsecstat.out_inval++;
434 goto bad;
435 }
436
437 error = 0;
438
439 /* check policy */
440 switch (sp->policy) {
441 case IPSEC_POLICY_DISCARD:
442 /*
443 * This packet is just discarded.
444 */
445 ipsecstat.out_polvio++;
446 goto bad;
447
448 case IPSEC_POLICY_BYPASS:
449 case IPSEC_POLICY_NONE:
450 case IPSEC_POLICY_TCP:
451 /* no need to do IPsec. */
452 goto skip_ipsec;
453
454 case IPSEC_POLICY_IPSEC:
455 if (sp->req == NULL) {
456 /* acquire a policy */
457 error = key_spdacquire(sp);
458 goto bad;
459 }
460 break;
461
462 case IPSEC_POLICY_ENTRUST:
463 default:
464 printf("ip_output: Invalid policy found. %d\n", sp->policy);
465 }
466 {
467 struct ipsec_output_state state;
468 bzero(&state, sizeof(state));
469 state.m = m;
470 if (flags & IP_ROUTETOIF) {
471 state.ro = &iproute;
472 bzero(&iproute, sizeof(iproute));
473 } else
474 state.ro = ro;
475 state.dst = (struct sockaddr *)dst;
476
477 ip->ip_sum = 0;
478
479 /*
480 * XXX
481 * delayed checksums are not currently compatible with IPsec
482 */
483 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
484 in_delayed_cksum(m);
485 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
486 }
487
488 ip->ip_len = htons(ip->ip_len);
489 ip->ip_off = htons(ip->ip_off);
490
491 error = ipsec4_output(&state, sp, flags);
492
493 m = state.m;
494 if (flags & IP_ROUTETOIF) {
495 /*
496 * if we have tunnel mode SA, we may need to ignore
497 * IP_ROUTETOIF.
498 */
499 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
500 flags &= ~IP_ROUTETOIF;
501 ro = state.ro;
502 }
503 } else
504 ro = state.ro;
505 dst = (struct sockaddr_in *)state.dst;
506 if (error) {
507 /* mbuf is already reclaimed in ipsec4_output. */
508 m = NULL;
509 switch (error) {
510 case EHOSTUNREACH:
511 case ENETUNREACH:
512 case EMSGSIZE:
513 case ENOBUFS:
514 case ENOMEM:
515 break;
516 default:
517 printf("ip4_output (ipsec): error code %d\n", error);
518 /*fall through*/
519 case ENOENT:
520 /* don't show these error codes to the user */
521 error = 0;
522 break;
523 }
524 goto bad;
525 }
526
527 /* be sure to update variables that are affected by ipsec4_output() */
528 ip = mtod(m, struct ip *);
529 hlen = ip->ip_hl << 2;
530 if (ro->ro_rt == NULL) {
531 if ((flags & IP_ROUTETOIF) == 0) {
532 printf("ip_output: "
533 "can't update route after IPsec processing\n");
534 error = EHOSTUNREACH; /*XXX*/
535 goto bad;
536 }
537 } else {
538 if (state.encap) {
539 ia = ifatoia(ro->ro_rt->rt_ifa);
540 ifp = ro->ro_rt->rt_ifp;
541 }
542 }
543 }
544
545 /* make it flipped, again. */
546 ip->ip_len = ntohs(ip->ip_len);
547 ip->ip_off = ntohs(ip->ip_off);
548 skip_ipsec:
549 #endif /*IPSEC*/
550 #ifdef FAST_IPSEC
551 /*
552 * Check the security policy (SP) for the packet and, if
553 * required, do IPsec-related processing. There are two
554 * cases here; the first time a packet is sent through
555 * it will be untagged and handled by ipsec4_checkpolicy.
556 * If the packet is resubmitted to ip_output (e.g. after
557 * AH, ESP, etc. processing), there will be a tag to bypass
558 * the lookup and related policy checking.
559 */
560 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
561 s = splnet();
562 if (mtag != NULL) {
563 tdbi = (struct tdb_ident *)(mtag + 1);
564 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
565 if (sp == NULL)
566 error = -EINVAL; /* force silent drop */
567 m_tag_delete(m, mtag);
568 } else {
569 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
570 &error, inp);
571 }
572 /*
573 * There are four return cases:
574 * sp != NULL apply IPsec policy
575 * sp == NULL, error == 0 no IPsec handling needed
576 * sp == NULL, error == -EINVAL discard packet w/o error
577 * sp == NULL, error != 0 discard packet, report error
578 */
579 if (sp != NULL) {
580 /* Loop detection, check if ipsec processing already done */
581 KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
582 for (mtag = m_tag_first(m); mtag != NULL;
583 mtag = m_tag_next(m, mtag)) {
584 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
585 continue;
586 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
587 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
588 continue;
589 /*
590 * Check if policy has an SA associated with it.
591 * This can happen when an SP has yet to acquire
592 * an SA; e.g. on first reference. If it occurs,
593 * then we let ipsec4_process_packet do its thing.
594 */
595 if (sp->req->sav == NULL)
596 break;
597 tdbi = (struct tdb_ident *)(mtag + 1);
598 if (tdbi->spi == sp->req->sav->spi &&
599 tdbi->proto == sp->req->sav->sah->saidx.proto &&
600 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
601 sizeof (union sockaddr_union)) == 0) {
602 /*
603 * No IPsec processing is needed, free
604 * reference to SP.
605 *
606 * NB: null pointer to avoid free at
607 * done: below.
608 */
609 KEY_FREESP(&sp), sp = NULL;
610 splx(s);
611 goto spd_done;
612 }
613 }
614
615 /*
616 * Do delayed checksums now because we send before
617 * this is done in the normal processing path.
618 */
619 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
620 in_delayed_cksum(m);
621 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
622 }
623
624 ip->ip_len = htons(ip->ip_len);
625 ip->ip_off = htons(ip->ip_off);
626
627 /* NB: callee frees mbuf */
628 error = ipsec4_process_packet(m, sp->req, flags, 0);
629 /*
630 * Preserve KAME behaviour: ENOENT can be returned
631 * when an SA acquire is in progress. Don't propagate
632 * this to user-level; it confuses applications.
633 *
634 * XXX this will go away when the SADB is redone.
635 */
636 if (error == ENOENT)
637 error = 0;
638 splx(s);
639 goto done;
640 } else {
641 splx(s);
642
643 if (error != 0) {
644 /*
645 * Hack: -EINVAL is used to signal that a packet
646 * should be silently discarded. This is typically
647 * because we asked key management for an SA and
648 * it was delayed (e.g. kicked up to IKE).
649 */
650 if (error == -EINVAL)
651 error = 0;
652 goto bad;
653 } else {
654 /* No IPsec processing for this packet. */
655 }
656 #ifdef notyet
657 /*
658 * If deferred crypto processing is needed, check that
659 * the interface supports it.
660 */
661 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
662 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
663 /* notify IPsec to do its own crypto */
664 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
665 error = EHOSTUNREACH;
666 goto bad;
667 }
668 #endif
669 }
670 spd_done:
671 #endif /* FAST_IPSEC */
672
673 /* Jump over all PFIL processing if hooks are not active. */
674 if (inet_pfil_hook.ph_busy_count == -1)
675 goto passout;
676
677 /* Run through list of hooks for output packets. */
678 odst.s_addr = ip->ip_dst.s_addr;
679 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
680 if (error != 0 || m == NULL)
681 goto done;
682
683 ip = mtod(m, struct ip *);
684
685 /* See if destination IP address was changed by packet filter. */
686 if (odst.s_addr != ip->ip_dst.s_addr) {
687 m->m_flags |= M_SKIP_FIREWALL;
688 /* If destination is now ourself drop to ip_input(). */
689 if (in_localip(ip->ip_dst)) {
690 m->m_flags |= M_FASTFWD_OURS;
691 if (m->m_pkthdr.rcvif == NULL)
692 m->m_pkthdr.rcvif = loif;
693 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
694 m->m_pkthdr.csum_flags |=
695 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
696 m->m_pkthdr.csum_data = 0xffff;
697 }
698 m->m_pkthdr.csum_flags |=
699 CSUM_IP_CHECKED | CSUM_IP_VALID;
700
701 error = netisr_queue(NETISR_IP, m);
702 goto done;
703 } else
704 goto again; /* Redo the routing table lookup. */
705 }
706
707 #ifdef IPFIREWALL_FORWARD
708 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
709 if (m->m_flags & M_FASTFWD_OURS) {
710 if (m->m_pkthdr.rcvif == NULL)
711 m->m_pkthdr.rcvif = loif;
712 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
713 m->m_pkthdr.csum_flags |=
714 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
715 m->m_pkthdr.csum_data = 0xffff;
716 }
717 m->m_pkthdr.csum_flags |=
718 CSUM_IP_CHECKED | CSUM_IP_VALID;
719
720 error = netisr_queue(NETISR_IP, m);
721 goto done;
722 }
723 /* Or forward to some other address? */
724 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
725 if (fwd_tag) {
726 dst = (struct sockaddr_in *)&ro->ro_dst;
727 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
728 m->m_flags |= M_SKIP_FIREWALL;
729 m_tag_delete(m, fwd_tag);
730 goto again;
731 }
732 #endif /* IPFIREWALL_FORWARD */
733
734 passout:
735 /* 127/8 must not appear on wire - RFC1122. */
736 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
737 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
738 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
739 ipstat.ips_badaddr++;
740 error = EADDRNOTAVAIL;
741 goto bad;
742 }
743 }
744
745 m->m_pkthdr.csum_flags |= CSUM_IP;
746 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
747 if (sw_csum & CSUM_DELAY_DATA) {
748 in_delayed_cksum(m);
749 sw_csum &= ~CSUM_DELAY_DATA;
750 }
751 m->m_pkthdr.csum_flags &= ifp->if_hwassist;
752
753 /*
754 * If small enough for interface, or the interface will take
755 * care of the fragmentation for us, can just send directly.
756 */
757 if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
758 ((ip->ip_off & IP_DF) == 0))) {
759 ip->ip_len = htons(ip->ip_len);
760 ip->ip_off = htons(ip->ip_off);
761 ip->ip_sum = 0;
762 if (sw_csum & CSUM_DELAY_IP)
763 ip->ip_sum = in_cksum(m, hlen);
764
765 /* Record statistics for this interface address. */
766 if (!(flags & IP_FORWARDING) && ia) {
767 ia->ia_ifa.if_opackets++;
768 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
769 }
770
771 #ifdef IPSEC
772 /* clean ipsec history once it goes out of the node */
773 ipsec_delaux(m);
774 #endif
775
776 #ifdef MBUF_STRESS_TEST
777 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
778 m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
779 #endif
780 error = (*ifp->if_output)(ifp, m,
781 (struct sockaddr *)dst, ro->ro_rt);
782 goto done;
783 }
784
785 if (ip->ip_off & IP_DF) {
786 error = EMSGSIZE;
787 /*
788 * This case can happen if the user changed the MTU
789 * of an interface after enabling IP on it. Because
790 * most netifs don't keep track of routes pointing to
791 * them, there is no way for one to update all its
792 * routes when the MTU is changed.
793 */
794 if (ro != NULL &&
795 (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
796 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
797 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
798 }
799 ipstat.ips_cantfrag++;
800 goto bad;
801 }
802
803 /*
804 * Too large for interface; fragment if possible. If successful,
805 * on return, m will point to a list of packets to be sent.
806 */
807 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
808 if (error)
809 goto bad;
810 for (; m; m = m0) {
811 m0 = m->m_nextpkt;
812 m->m_nextpkt = 0;
813 #ifdef IPSEC
814 /* clean ipsec history once it goes out of the node */
815 ipsec_delaux(m);
816 #endif
817 if (error == 0) {
818 /* Record statistics for this interface address. */
819 if (ia != NULL) {
820 ia->ia_ifa.if_opackets++;
821 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
822 }
823
824 error = (*ifp->if_output)(ifp, m,
825 (struct sockaddr *)dst, ro->ro_rt);
826 } else
827 m_freem(m);
828 }
829
830 if (error == 0)
831 ipstat.ips_fragmented++;
832
833 done:
834 if (ro == &iproute && ro->ro_rt) {
835 RTFREE(ro->ro_rt);
836 }
837 #ifdef IPSEC
838 if (sp != NULL) {
839 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
840 printf("DP ip_output call free SP:%p\n", sp));
841 key_freesp(sp);
842 }
843 #endif
844 #ifdef FAST_IPSEC
845 if (sp != NULL)
846 KEY_FREESP(&sp);
847 #endif
848 return (error);
849 bad:
850 m_freem(m);
851 goto done;
852 }
853
854 /*
855 * Create a chain of fragments which fit the given mtu. m_frag points to the
856 * mbuf to be fragmented; on return it points to the chain with the fragments.
857 * Return 0 if no error. If error, m_frag may contain a partially built
858 * chain of fragments that should be freed by the caller.
859 *
860 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
861 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
862 */
863 int
864 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
865 u_long if_hwassist_flags, int sw_csum)
866 {
867 int error = 0;
868 int hlen = ip->ip_hl << 2;
869 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */
870 int off;
871 struct mbuf *m0 = *m_frag; /* the original packet */
872 int firstlen;
873 struct mbuf **mnext;
874 int nfrags;
875
876 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
877 ipstat.ips_cantfrag++;
878 return EMSGSIZE;
879 }
880
881 /*
882 * Must be able to put at least 8 bytes per fragment.
883 */
884 if (len < 8)
885 return EMSGSIZE;
886
887 /*
888 * If the interface will not calculate checksums on
889 * fragmented packets, then do it here.
890 */
891 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
892 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
893 in_delayed_cksum(m0);
894 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
895 }
896
897 if (len > PAGE_SIZE) {
898 /*
899 * Fragment large datagrams such that each segment
900 * contains a multiple of PAGE_SIZE amount of data,
901 * plus headers. This enables a receiver to perform
902 * page-flipping zero-copy optimizations.
903 *
904 * XXX When does this help given that sender and receiver
905 * could have different page sizes, and also mtu could
906 * be less than the receiver's page size ?
907 */
908 int newlen;
909 struct mbuf *m;
910
911 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
912 off += m->m_len;
913
914 /*
915 * firstlen (off - hlen) must be aligned on an
916 * 8-byte boundary
917 */
918 if (off < hlen)
919 goto smart_frag_failure;
920 off = ((off - hlen) & ~7) + hlen;
921 newlen = (~PAGE_MASK) & mtu;
922 if ((newlen + sizeof (struct ip)) > mtu) {
923 /* we failed, go back the default */
924 smart_frag_failure:
925 newlen = len;
926 off = hlen + len;
927 }
928 len = newlen;
929
930 } else {
931 off = hlen + len;
932 }
933
934 firstlen = off - hlen;
935 mnext = &m0->m_nextpkt; /* pointer to next packet */
936
937 /*
938 * Loop through length of segment after first fragment,
939 * make new header and copy data of each part and link onto chain.
940 * Here, m0 is the original packet, m is the fragment being created.
941 * The fragments are linked off the m_nextpkt of the original
942 * packet, which after processing serves as the first fragment.
943 */
944 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
945 struct ip *mhip; /* ip header on the fragment */
946 struct mbuf *m;
947 int mhlen = sizeof (struct ip);
948
949 MGETHDR(m, M_DONTWAIT, MT_HEADER);
950 if (m == NULL) {
951 error = ENOBUFS;
952 ipstat.ips_odropped++;
953 goto done;
954 }
955 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
956 /*
957 * In the first mbuf, leave room for the link header, then
958 * copy the original IP header including options. The payload
959 * goes into an additional mbuf chain returned by m_copy().
960 */
961 m->m_data += max_linkhdr;
962 mhip = mtod(m, struct ip *);
963 *mhip = *ip;
964 if (hlen > sizeof (struct ip)) {
965 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
966 mhip->ip_v = IPVERSION;
967 mhip->ip_hl = mhlen >> 2;
968 }
969 m->m_len = mhlen;
970 /* XXX do we need to add ip->ip_off below ? */
971 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
972 if (off + len >= ip->ip_len) { /* last fragment */
973 len = ip->ip_len - off;
974 m->m_flags |= M_LASTFRAG;
975 } else
976 mhip->ip_off |= IP_MF;
977 mhip->ip_len = htons((u_short)(len + mhlen));
978 m->m_next = m_copy(m0, off, len);
979 if (m->m_next == NULL) { /* copy failed */
980 m_free(m);
981 error = ENOBUFS; /* ??? */
982 ipstat.ips_odropped++;
983 goto done;
984 }
985 m->m_pkthdr.len = mhlen + len;
986 m->m_pkthdr.rcvif = NULL;
987 #ifdef MAC
988 mac_create_fragment(m0, m);
989 #endif
990 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
991 mhip->ip_off = htons(mhip->ip_off);
992 mhip->ip_sum = 0;
993 if (sw_csum & CSUM_DELAY_IP)
994 mhip->ip_sum = in_cksum(m, mhlen);
995 *mnext = m;
996 mnext = &m->m_nextpkt;
997 }
998 ipstat.ips_ofragments += nfrags;
999
1000 /* set first marker for fragment chain */
1001 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
1002 m0->m_pkthdr.csum_data = nfrags;
1003
1004 /*
1005 * Update first fragment by trimming what's been copied out
1006 * and updating header.
1007 */
1008 m_adj(m0, hlen + firstlen - ip->ip_len);
1009 m0->m_pkthdr.len = hlen + firstlen;
1010 ip->ip_len = htons((u_short)m0->m_pkthdr.len);
1011 ip->ip_off |= IP_MF;
1012 ip->ip_off = htons(ip->ip_off);
1013 ip->ip_sum = 0;
1014 if (sw_csum & CSUM_DELAY_IP)
1015 ip->ip_sum = in_cksum(m0, hlen);
1016
1017 done:
1018 *m_frag = m0;
1019 return error;
1020 }
1021
1022 void
1023 in_delayed_cksum(struct mbuf *m)
1024 {
1025 struct ip *ip;
1026 u_short csum, offset;
1027
1028 ip = mtod(m, struct ip *);
1029 offset = ip->ip_hl << 2 ;
1030 csum = in_cksum_skip(m, ip->ip_len, offset);
1031 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1032 csum = 0xffff;
1033 offset += m->m_pkthdr.csum_data; /* checksum offset */
1034
1035 if (offset + sizeof(u_short) > m->m_len) {
1036 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1037 m->m_len, offset, ip->ip_p);
1038 /*
1039 * XXX
1040 * this shouldn't happen, but if it does, the
1041 * correct behavior may be to insert the checksum
1042 * in the appropriate next mbuf in the chain.
1043 */
1044 return;
1045 }
1046 *(u_short *)(m->m_data + offset) = csum;
1047 }
1048
1049 /*
1050 * Insert IP options into preformed packet.
1051 * Adjust IP destination as required for IP source routing,
1052 * as indicated by a non-zero in_addr at the start of the options.
1053 *
1054 * XXX This routine assumes that the packet has no options in place.
1055 */
1056 static struct mbuf *
1057 ip_insertoptions(m, opt, phlen)
1058 register struct mbuf *m;
1059 struct mbuf *opt;
1060 int *phlen;
1061 {
1062 register struct ipoption *p = mtod(opt, struct ipoption *);
1063 struct mbuf *n;
1064 register struct ip *ip = mtod(m, struct ip *);
1065 unsigned optlen;
1066
1067 optlen = opt->m_len - sizeof(p->ipopt_dst);
1068 if (optlen + ip->ip_len > IP_MAXPACKET) {
1069 *phlen = 0;
1070 return (m); /* XXX should fail */
1071 }
1072 if (p->ipopt_dst.s_addr)
1073 ip->ip_dst = p->ipopt_dst;
1074 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1075 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1076 if (n == NULL) {
1077 *phlen = 0;
1078 return (m);
1079 }
1080 M_MOVE_PKTHDR(n, m);
1081 n->m_pkthdr.rcvif = NULL;
1082 #ifdef MAC
1083 mac_copy_mbuf(m, n);
1084 #endif
1085 n->m_pkthdr.len += optlen;
1086 m->m_len -= sizeof(struct ip);
1087 m->m_data += sizeof(struct ip);
1088 n->m_next = m;
1089 m = n;
1090 m->m_len = optlen + sizeof(struct ip);
1091 m->m_data += max_linkhdr;
1092 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1093 } else {
1094 m->m_data -= optlen;
1095 m->m_len += optlen;
1096 m->m_pkthdr.len += optlen;
1097 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1098 }
1099 ip = mtod(m, struct ip *);
1100 bcopy(p->ipopt_list, ip + 1, optlen);
1101 *phlen = sizeof(struct ip) + optlen;
1102 ip->ip_v = IPVERSION;
1103 ip->ip_hl = *phlen >> 2;
1104 ip->ip_len += optlen;
1105 return (m);
1106 }
1107
1108 /*
1109 * Copy options from ip to jp,
1110 * omitting those not copied during fragmentation.
1111 */
1112 int
1113 ip_optcopy(ip, jp)
1114 struct ip *ip, *jp;
1115 {
1116 register u_char *cp, *dp;
1117 int opt, optlen, cnt;
1118
1119 cp = (u_char *)(ip + 1);
1120 dp = (u_char *)(jp + 1);
1121 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1122 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1123 opt = cp[0];
1124 if (opt == IPOPT_EOL)
1125 break;
1126 if (opt == IPOPT_NOP) {
1127 /* Preserve for IP mcast tunnel's LSRR alignment. */
1128 *dp++ = IPOPT_NOP;
1129 optlen = 1;
1130 continue;
1131 }
1132
1133 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
1134 ("ip_optcopy: malformed ipv4 option"));
1135 optlen = cp[IPOPT_OLEN];
1136 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
1137 ("ip_optcopy: malformed ipv4 option"));
1138
1139 /* bogus lengths should have been caught by ip_dooptions */
1140 if (optlen > cnt)
1141 optlen = cnt;
1142 if (IPOPT_COPIED(opt)) {
1143 bcopy(cp, dp, optlen);
1144 dp += optlen;
1145 }
1146 }
1147 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1148 *dp++ = IPOPT_EOL;
1149 return (optlen);
1150 }
1151
1152 /*
1153 * IP socket option processing.
1154 *
1155 * There are two versions of this call in order to work around a race
1156 * condition in TCP in FreeBSD 6.x. In the TCP implementation, so->so_pcb
1157 * can become NULL if the pcb or pcbinfo lock isn't held. However, when
1158 * entering ip_ctloutput(), neither lock is held, and finding the pointer to
1159 * either lock requires follow so->so_pcb, which may be NULL.
1160 * ip_ctloutput_pcbinfo() accepts the pcbinfo pointer so that the lock can be
1161 * safely acquired. This is not required in FreeBSD 7.x because the
1162 * invariants on so->so_pcb are much stronger, so it cannot become NULL
1163 * while the socket is in use.
1164 */
1165 int
1166 ip_ctloutput_pcbinfo(so, sopt, pcbinfo)
1167 struct socket *so;
1168 struct sockopt *sopt;
1169 struct inpcbinfo *pcbinfo;
1170 {
1171 struct inpcb *inp = sotoinpcb(so);
1172 int error, optval;
1173
1174 if (pcbinfo == NULL)
1175 pcbinfo = inp->inp_pcbinfo;
1176
1177 error = optval = 0;
1178 if (sopt->sopt_level != IPPROTO_IP) {
1179 return (EINVAL);
1180 }
1181
1182 if (inp == NULL)
1183 return (EINVAL);
1184
1185 switch (sopt->sopt_dir) {
1186 case SOPT_SET:
1187 switch (sopt->sopt_name) {
1188 case IP_OPTIONS:
1189 #ifdef notyet
1190 case IP_RETOPTS:
1191 #endif
1192 {
1193 struct mbuf *m;
1194 if (sopt->sopt_valsize > MLEN) {
1195 error = EMSGSIZE;
1196 break;
1197 }
1198 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
1199 if (m == NULL) {
1200 error = ENOBUFS;
1201 break;
1202 }
1203 m->m_len = sopt->sopt_valsize;
1204 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1205 m->m_len);
1206 if (error) {
1207 m_free(m);
1208 break;
1209 }
1210 INP_INFO_WLOCK(pcbinfo);
1211 if (so->so_pcb == NULL) {
1212 INP_INFO_WUNLOCK(pcbinfo);
1213 m_free(m);
1214 error = EINVAL;
1215 break;
1216 }
1217 INP_LOCK(inp);
1218 INP_INFO_WUNLOCK(pcbinfo);
1219 error = ip_pcbopts(inp, sopt->sopt_name, m);
1220 INP_UNLOCK(inp);
1221 return (error);
1222 }
1223
1224 case IP_TOS:
1225 case IP_TTL:
1226 case IP_MINTTL:
1227 case IP_RECVOPTS:
1228 case IP_RECVRETOPTS:
1229 case IP_RECVDSTADDR:
1230 case IP_RECVTTL:
1231 case IP_RECVIF:
1232 case IP_FAITH:
1233 case IP_ONESBCAST:
1234 case IP_DONTFRAG:
1235 error = sooptcopyin(sopt, &optval, sizeof optval,
1236 sizeof optval);
1237 if (error)
1238 break;
1239
1240 INP_INFO_WLOCK(pcbinfo);
1241 if (so->so_pcb == NULL) {
1242 INP_INFO_WUNLOCK(pcbinfo);
1243 error = EINVAL;
1244 break;
1245 }
1246 INP_LOCK(inp);
1247 INP_INFO_WUNLOCK(pcbinfo);
1248 switch (sopt->sopt_name) {
1249 case IP_TOS:
1250 inp->inp_ip_tos = optval;
1251 break;
1252
1253 case IP_TTL:
1254 inp->inp_ip_ttl = optval;
1255 break;
1256
1257 case IP_MINTTL:
1258 if (optval >= 0 && optval <= MAXTTL)
1259 inp->inp_ip_minttl = optval;
1260 else
1261 error = EINVAL;
1262 break;
1263
1264 #define OPTSET(bit) do { \
1265 INP_LOCK(inp); \
1266 if (optval) \
1267 inp->inp_flags |= bit; \
1268 else \
1269 inp->inp_flags &= ~bit; \
1270 INP_UNLOCK(inp); \
1271 } while (0)
1272
1273 case IP_RECVOPTS:
1274 OPTSET(INP_RECVOPTS);
1275 break;
1276
1277 case IP_RECVRETOPTS:
1278 OPTSET(INP_RECVRETOPTS);
1279 break;
1280
1281 case IP_RECVDSTADDR:
1282 OPTSET(INP_RECVDSTADDR);
1283 break;
1284
1285 case IP_RECVTTL:
1286 OPTSET(INP_RECVTTL);
1287 break;
1288
1289 case IP_RECVIF:
1290 OPTSET(INP_RECVIF);
1291 break;
1292
1293 case IP_FAITH:
1294 OPTSET(INP_FAITH);
1295 break;
1296
1297 case IP_ONESBCAST:
1298 OPTSET(INP_ONESBCAST);
1299 break;
1300 case IP_DONTFRAG:
1301 OPTSET(INP_DONTFRAG);
1302 break;
1303 }
1304 INP_UNLOCK(inp);
1305 break;
1306 #undef OPTSET
1307
1308 case IP_MULTICAST_IF:
1309 case IP_MULTICAST_VIF:
1310 case IP_MULTICAST_TTL:
1311 case IP_MULTICAST_LOOP:
1312 case IP_ADD_MEMBERSHIP:
1313 case IP_DROP_MEMBERSHIP:
1314 error = ip_setmoptions(inp, sopt);
1315 break;
1316
1317 case IP_PORTRANGE:
1318 error = sooptcopyin(sopt, &optval, sizeof optval,
1319 sizeof optval);
1320 if (error)
1321 break;
1322
1323 INP_INFO_WLOCK(pcbinfo);
1324 if (so->so_pcb == NULL) {
1325 INP_INFO_WUNLOCK(pcbinfo);
1326 error = EINVAL;
1327 break;
1328 }
1329 INP_LOCK(inp);
1330 INP_INFO_WUNLOCK(pcbinfo);
1331 switch (optval) {
1332 case IP_PORTRANGE_DEFAULT:
1333 inp->inp_flags &= ~(INP_LOWPORT);
1334 inp->inp_flags &= ~(INP_HIGHPORT);
1335 break;
1336
1337 case IP_PORTRANGE_HIGH:
1338 inp->inp_flags &= ~(INP_LOWPORT);
1339 inp->inp_flags |= INP_HIGHPORT;
1340 break;
1341
1342 case IP_PORTRANGE_LOW:
1343 inp->inp_flags &= ~(INP_HIGHPORT);
1344 inp->inp_flags |= INP_LOWPORT;
1345 break;
1346
1347 default:
1348 error = EINVAL;
1349 break;
1350 }
1351 INP_UNLOCK(inp);
1352 break;
1353
1354 #if defined(IPSEC) || defined(FAST_IPSEC)
1355 case IP_IPSEC_POLICY:
1356 {
1357 caddr_t req;
1358 size_t len = 0;
1359 int priv;
1360 struct mbuf *m;
1361 int optname;
1362
1363 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1364 break;
1365 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1366 break;
1367 priv = (sopt->sopt_td != NULL &&
1368 suser(sopt->sopt_td) != 0) ? 0 : 1;
1369 req = mtod(m, caddr_t);
1370 len = m->m_len;
1371 optname = sopt->sopt_name;
1372 INP_INFO_WLOCK(pcbinfo);
1373 if (so->so_pcb == NULL) {
1374 INP_INFO_WUNLOCK(pcbinfo);
1375 m_free(m);
1376 error = EINVAL;
1377 break;
1378 }
1379 INP_LOCK(inp);
1380 INP_INFO_WUNLOCK(pcbinfo);
1381 error = ipsec4_set_policy(inp, optname, req, len, priv);
1382 INP_UNLOCK(inp);
1383 m_freem(m);
1384 break;
1385 }
1386 #endif /*IPSEC*/
1387
1388 default:
1389 error = ENOPROTOOPT;
1390 break;
1391 }
1392 break;
1393
1394 case SOPT_GET:
1395 switch (sopt->sopt_name) {
1396 case IP_OPTIONS:
1397 case IP_RETOPTS:
1398 if (inp->inp_options)
1399 error = sooptcopyout(sopt,
1400 mtod(inp->inp_options,
1401 char *),
1402 inp->inp_options->m_len);
1403 else
1404 sopt->sopt_valsize = 0;
1405 break;
1406
1407 case IP_TOS:
1408 case IP_TTL:
1409 case IP_MINTTL:
1410 case IP_RECVOPTS:
1411 case IP_RECVRETOPTS:
1412 case IP_RECVDSTADDR:
1413 case IP_RECVTTL:
1414 case IP_RECVIF:
1415 case IP_PORTRANGE:
1416 case IP_FAITH:
1417 case IP_ONESBCAST:
1418 case IP_DONTFRAG:
1419 switch (sopt->sopt_name) {
1420
1421 case IP_TOS:
1422 optval = inp->inp_ip_tos;
1423 break;
1424
1425 case IP_TTL:
1426 optval = inp->inp_ip_ttl;
1427 break;
1428
1429 case IP_MINTTL:
1430 optval = inp->inp_ip_minttl;
1431 break;
1432
1433 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1434
1435 case IP_RECVOPTS:
1436 optval = OPTBIT(INP_RECVOPTS);
1437 break;
1438
1439 case IP_RECVRETOPTS:
1440 optval = OPTBIT(INP_RECVRETOPTS);
1441 break;
1442
1443 case IP_RECVDSTADDR:
1444 optval = OPTBIT(INP_RECVDSTADDR);
1445 break;
1446
1447 case IP_RECVTTL:
1448 optval = OPTBIT(INP_RECVTTL);
1449 break;
1450
1451 case IP_RECVIF:
1452 optval = OPTBIT(INP_RECVIF);
1453 break;
1454
1455 case IP_PORTRANGE:
1456 if (inp->inp_flags & INP_HIGHPORT)
1457 optval = IP_PORTRANGE_HIGH;
1458 else if (inp->inp_flags & INP_LOWPORT)
1459 optval = IP_PORTRANGE_LOW;
1460 else
1461 optval = 0;
1462 break;
1463
1464 case IP_FAITH:
1465 optval = OPTBIT(INP_FAITH);
1466 break;
1467
1468 case IP_ONESBCAST:
1469 optval = OPTBIT(INP_ONESBCAST);
1470 break;
1471 case IP_DONTFRAG:
1472 optval = OPTBIT(INP_DONTFRAG);
1473 break;
1474 }
1475 error = sooptcopyout(sopt, &optval, sizeof optval);
1476 break;
1477
1478 case IP_MULTICAST_IF:
1479 case IP_MULTICAST_VIF:
1480 case IP_MULTICAST_TTL:
1481 case IP_MULTICAST_LOOP:
1482 case IP_ADD_MEMBERSHIP:
1483 case IP_DROP_MEMBERSHIP:
1484 error = ip_getmoptions(inp, sopt);
1485 break;
1486
1487 #if defined(IPSEC) || defined(FAST_IPSEC)
1488 case IP_IPSEC_POLICY:
1489 {
1490 struct mbuf *m = NULL;
1491 caddr_t req = NULL;
1492 size_t len = 0;
1493
1494 if (m != 0) {
1495 req = mtod(m, caddr_t);
1496 len = m->m_len;
1497 }
1498 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1499 if (error == 0)
1500 error = soopt_mcopyout(sopt, m); /* XXX */
1501 if (error == 0)
1502 m_freem(m);
1503 break;
1504 }
1505 #endif /*IPSEC*/
1506
1507 default:
1508 error = ENOPROTOOPT;
1509 break;
1510 }
1511 break;
1512 }
1513 return (error);
1514 }
1515
1516 int
1517 ip_ctloutput(so, sopt)
1518 struct socket *so;
1519 struct sockopt *sopt;
1520 {
1521
1522 return (ip_ctloutput_pcbinfo(so, sopt, NULL));
1523 }
1524
1525 /*
1526 * Set up IP options in pcb for insertion in output packets.
1527 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1528 * with destination address if source routed.
1529 */
1530 static int
1531 ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
1532 {
1533 register int cnt, optlen;
1534 register u_char *cp;
1535 struct mbuf **pcbopt;
1536 u_char opt;
1537
1538 INP_LOCK_ASSERT(inp);
1539
1540 pcbopt = &inp->inp_options;
1541
1542 /* turn off any old options */
1543 if (*pcbopt)
1544 (void)m_free(*pcbopt);
1545 *pcbopt = 0;
1546 if (m == NULL || m->m_len == 0) {
1547 /*
1548 * Only turning off any previous options.
1549 */
1550 if (m != NULL)
1551 (void)m_free(m);
1552 return (0);
1553 }
1554
1555 if (m->m_len % sizeof(int32_t))
1556 goto bad;
1557 /*
1558 * IP first-hop destination address will be stored before
1559 * actual options; move other options back
1560 * and clear it when none present.
1561 */
1562 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1563 goto bad;
1564 cnt = m->m_len;
1565 m->m_len += sizeof(struct in_addr);
1566 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1567 bcopy(mtod(m, void *), cp, (unsigned)cnt);
1568 bzero(mtod(m, void *), sizeof(struct in_addr));
1569
1570 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1571 opt = cp[IPOPT_OPTVAL];
1572 if (opt == IPOPT_EOL)
1573 break;
1574 if (opt == IPOPT_NOP)
1575 optlen = 1;
1576 else {
1577 if (cnt < IPOPT_OLEN + sizeof(*cp))
1578 goto bad;
1579 optlen = cp[IPOPT_OLEN];
1580 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1581 goto bad;
1582 }
1583 switch (opt) {
1584
1585 default:
1586 break;
1587
1588 case IPOPT_LSRR:
1589 case IPOPT_SSRR:
1590 /*
1591 * user process specifies route as:
1592 * ->A->B->C->D
1593 * D must be our final destination (but we can't
1594 * check that since we may not have connected yet).
1595 * A is first hop destination, which doesn't appear in
1596 * actual IP option, but is stored before the options.
1597 */
1598 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1599 goto bad;
1600 m->m_len -= sizeof(struct in_addr);
1601 cnt -= sizeof(struct in_addr);
1602 optlen -= sizeof(struct in_addr);
1603 cp[IPOPT_OLEN] = optlen;
1604 /*
1605 * Move first hop before start of options.
1606 */
1607 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1608 sizeof(struct in_addr));
1609 /*
1610 * Then copy rest of options back
1611 * to close up the deleted entry.
1612 */
1613 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
1614 &cp[IPOPT_OFFSET+1],
1615 (unsigned)cnt - (IPOPT_MINOFF - 1));
1616 break;
1617 }
1618 }
1619 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1620 goto bad;
1621 *pcbopt = m;
1622 return (0);
1623
1624 bad:
1625 (void)m_free(m);
1626 return (EINVAL);
1627 }
1628
1629 /*
1630 * XXX
1631 * The whole multicast option thing needs to be re-thought.
1632 * Several of these options are equally applicable to non-multicast
1633 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1634 * standard option (IP_TTL).
1635 */
1636
1637 /*
1638 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1639 */
1640 static struct ifnet *
1641 ip_multicast_if(a, ifindexp)
1642 struct in_addr *a;
1643 int *ifindexp;
1644 {
1645 int ifindex;
1646 struct ifnet *ifp;
1647
1648 if (ifindexp)
1649 *ifindexp = 0;
1650 if (ntohl(a->s_addr) >> 24 == 0) {
1651 ifindex = ntohl(a->s_addr) & 0xffffff;
1652 if (ifindex < 0 || if_index < ifindex)
1653 return NULL;
1654 ifp = ifnet_byindex(ifindex);
1655 if (ifindexp)
1656 *ifindexp = ifindex;
1657 } else {
1658 INADDR_TO_IFP(*a, ifp);
1659 }
1660 return ifp;
1661 }
1662
1663 /*
1664 * Given an inpcb, return its multicast options structure pointer. Accepts
1665 * an unlocked inpcb pointer, but will return it locked. May sleep.
1666 */
1667 static struct ip_moptions *
1668 ip_findmoptions(struct inpcb *inp)
1669 {
1670 struct ip_moptions *imo;
1671
1672 INP_LOCK(inp);
1673 if (inp->inp_moptions != NULL)
1674 return (inp->inp_moptions);
1675
1676 INP_UNLOCK(inp);
1677
1678 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1679
1680 imo->imo_multicast_ifp = NULL;
1681 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1682 imo->imo_multicast_vif = -1;
1683 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1684 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1685 imo->imo_num_memberships = 0;
1686
1687 INP_LOCK(inp);
1688 if (inp->inp_moptions != NULL) {
1689 free(imo, M_IPMOPTS);
1690 return (inp->inp_moptions);
1691 }
1692 inp->inp_moptions = imo;
1693 return (imo);
1694 }
1695
1696 /*
1697 * Set the IP multicast options in response to user setsockopt().
1698 */
1699 static int
1700 ip_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1701 {
1702 int error = 0;
1703 int i;
1704 struct in_addr addr;
1705 struct ip_mreq mreq;
1706 struct ifnet *ifp;
1707 struct ip_moptions *imo;
1708 struct route ro;
1709 struct sockaddr_in *dst;
1710 int ifindex;
1711 int s;
1712
1713 /*
1714 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1715 * or is a divert socket, reject it.
1716 * XXX Unlocked read of inp_socket believed OK.
1717 */
1718 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1719 (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1720 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
1721 return (EOPNOTSUPP);
1722
1723 switch (sopt->sopt_name) {
1724 /* store an index number for the vif you wanna use in the send */
1725 case IP_MULTICAST_VIF:
1726 if (legal_vif_num == 0) {
1727 error = EOPNOTSUPP;
1728 break;
1729 }
1730 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1731 if (error)
1732 break;
1733 if (!legal_vif_num(i) && (i != -1)) {
1734 error = EINVAL;
1735 break;
1736 }
1737 imo = ip_findmoptions(inp);
1738 imo->imo_multicast_vif = i;
1739 INP_UNLOCK(inp);
1740 break;
1741
1742 case IP_MULTICAST_IF:
1743 /*
1744 * Select the interface for outgoing multicast packets.
1745 */
1746 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1747 if (error)
1748 break;
1749 /*
1750 * INADDR_ANY is used to remove a previous selection.
1751 * When no interface is selected, a default one is
1752 * chosen every time a multicast packet is sent.
1753 */
1754 imo = ip_findmoptions(inp);
1755 if (addr.s_addr == INADDR_ANY) {
1756 imo->imo_multicast_ifp = NULL;
1757 INP_UNLOCK(inp);
1758 break;
1759 }
1760 /*
1761 * The selected interface is identified by its local
1762 * IP address. Find the interface and confirm that
1763 * it supports multicasting.
1764 */
1765 s = splimp();
1766 ifp = ip_multicast_if(&addr, &ifindex);
1767 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1768 INP_UNLOCK(inp);
1769 splx(s);
1770 error = EADDRNOTAVAIL;
1771 break;
1772 }
1773 imo->imo_multicast_ifp = ifp;
1774 if (ifindex)
1775 imo->imo_multicast_addr = addr;
1776 else
1777 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1778 INP_UNLOCK(inp);
1779 splx(s);
1780 break;
1781
1782 case IP_MULTICAST_TTL:
1783 /*
1784 * Set the IP time-to-live for outgoing multicast packets.
1785 * The original multicast API required a char argument,
1786 * which is inconsistent with the rest of the socket API.
1787 * We allow either a char or an int.
1788 */
1789 if (sopt->sopt_valsize == 1) {
1790 u_char ttl;
1791 error = sooptcopyin(sopt, &ttl, 1, 1);
1792 if (error)
1793 break;
1794 imo = ip_findmoptions(inp);
1795 imo->imo_multicast_ttl = ttl;
1796 INP_UNLOCK(inp);
1797 } else {
1798 u_int ttl;
1799 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1800 sizeof ttl);
1801 if (error)
1802 break;
1803 if (ttl > 255)
1804 error = EINVAL;
1805 else {
1806 imo = ip_findmoptions(inp);
1807 imo->imo_multicast_ttl = ttl;
1808 INP_UNLOCK(inp);
1809 }
1810 }
1811 break;
1812
1813 case IP_MULTICAST_LOOP:
1814 /*
1815 * Set the loopback flag for outgoing multicast packets.
1816 * Must be zero or one. The original multicast API required a
1817 * char argument, which is inconsistent with the rest
1818 * of the socket API. We allow either a char or an int.
1819 */
1820 if (sopt->sopt_valsize == 1) {
1821 u_char loop;
1822 error = sooptcopyin(sopt, &loop, 1, 1);
1823 if (error)
1824 break;
1825 imo = ip_findmoptions(inp);
1826 imo->imo_multicast_loop = !!loop;
1827 INP_UNLOCK(inp);
1828 } else {
1829 u_int loop;
1830 error = sooptcopyin(sopt, &loop, sizeof loop,
1831 sizeof loop);
1832 if (error)
1833 break;
1834 imo = ip_findmoptions(inp);
1835 imo->imo_multicast_loop = !!loop;
1836 INP_UNLOCK(inp);
1837 }
1838 break;
1839
1840 case IP_ADD_MEMBERSHIP:
1841 /*
1842 * Add a multicast group membership.
1843 * Group must be a valid IP multicast address.
1844 */
1845 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1846 if (error)
1847 break;
1848
1849 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1850 error = EINVAL;
1851 break;
1852 }
1853 s = splimp();
1854 /*
1855 * If no interface address was provided, use the interface of
1856 * the route to the given multicast address.
1857 */
1858 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1859 bzero((caddr_t)&ro, sizeof(ro));
1860 dst = (struct sockaddr_in *)&ro.ro_dst;
1861 dst->sin_len = sizeof(*dst);
1862 dst->sin_family = AF_INET;
1863 dst->sin_addr = mreq.imr_multiaddr;
1864 rtalloc_ign(&ro, RTF_CLONING);
1865 if (ro.ro_rt == NULL) {
1866 error = EADDRNOTAVAIL;
1867 splx(s);
1868 break;
1869 }
1870 ifp = ro.ro_rt->rt_ifp;
1871 RTFREE(ro.ro_rt);
1872 }
1873 else {
1874 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1875 }
1876
1877 /*
1878 * See if we found an interface, and confirm that it
1879 * supports multicast.
1880 */
1881 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1882 error = EADDRNOTAVAIL;
1883 splx(s);
1884 break;
1885 }
1886 /*
1887 * See if the membership already exists or if all the
1888 * membership slots are full.
1889 */
1890 imo = ip_findmoptions(inp);
1891 for (i = 0; i < imo->imo_num_memberships; ++i) {
1892 if (imo->imo_membership[i]->inm_ifp == ifp &&
1893 imo->imo_membership[i]->inm_addr.s_addr
1894 == mreq.imr_multiaddr.s_addr)
1895 break;
1896 }
1897 if (i < imo->imo_num_memberships) {
1898 INP_UNLOCK(inp);
1899 error = EADDRINUSE;
1900 splx(s);
1901 break;
1902 }
1903 if (i == IP_MAX_MEMBERSHIPS) {
1904 INP_UNLOCK(inp);
1905 error = ETOOMANYREFS;
1906 splx(s);
1907 break;
1908 }
1909 /*
1910 * Everything looks good; add a new record to the multicast
1911 * address list for the given interface.
1912 */
1913 if ((imo->imo_membership[i] =
1914 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1915 INP_UNLOCK(inp);
1916 error = ENOBUFS;
1917 splx(s);
1918 break;
1919 }
1920 ++imo->imo_num_memberships;
1921 INP_UNLOCK(inp);
1922 splx(s);
1923 break;
1924
1925 case IP_DROP_MEMBERSHIP:
1926 /*
1927 * Drop a multicast group membership.
1928 * Group must be a valid IP multicast address.
1929 */
1930 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1931 if (error)
1932 break;
1933
1934 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1935 error = EINVAL;
1936 break;
1937 }
1938
1939 s = splimp();
1940 /*
1941 * If an interface address was specified, get a pointer
1942 * to its ifnet structure.
1943 */
1944 if (mreq.imr_interface.s_addr == INADDR_ANY)
1945 ifp = NULL;
1946 else {
1947 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1948 if (ifp == NULL) {
1949 error = EADDRNOTAVAIL;
1950 splx(s);
1951 break;
1952 }
1953 }
1954 /*
1955 * Find the membership in the membership array.
1956 */
1957 imo = ip_findmoptions(inp);
1958 for (i = 0; i < imo->imo_num_memberships; ++i) {
1959 if ((ifp == NULL ||
1960 imo->imo_membership[i]->inm_ifp == ifp) &&
1961 imo->imo_membership[i]->inm_addr.s_addr ==
1962 mreq.imr_multiaddr.s_addr)
1963 break;
1964 }
1965 if (i == imo->imo_num_memberships) {
1966 INP_UNLOCK(inp);
1967 error = EADDRNOTAVAIL;
1968 splx(s);
1969 break;
1970 }
1971 /*
1972 * Give up the multicast address record to which the
1973 * membership points.
1974 */
1975 in_delmulti(imo->imo_membership[i]);
1976 /*
1977 * Remove the gap in the membership array.
1978 */
1979 for (++i; i < imo->imo_num_memberships; ++i)
1980 imo->imo_membership[i-1] = imo->imo_membership[i];
1981 --imo->imo_num_memberships;
1982 INP_UNLOCK(inp);
1983 splx(s);
1984 break;
1985
1986 default:
1987 error = EOPNOTSUPP;
1988 break;
1989 }
1990
1991 return (error);
1992 }
1993
1994 /*
1995 * Return the IP multicast options in response to user getsockopt().
1996 */
1997 static int
1998 ip_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1999 {
2000 struct ip_moptions *imo;
2001 struct in_addr addr;
2002 struct in_ifaddr *ia;
2003 int error, optval;
2004 u_char coptval;
2005
2006 INP_LOCK(inp);
2007 imo = inp->inp_moptions;
2008 /*
2009 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2010 * or is a divert socket, reject it.
2011 */
2012 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2013 (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2014 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
2015 INP_UNLOCK(inp);
2016 return (EOPNOTSUPP);
2017 }
2018
2019 error = 0;
2020 switch (sopt->sopt_name) {
2021 case IP_MULTICAST_VIF:
2022 if (imo != NULL)
2023 optval = imo->imo_multicast_vif;
2024 else
2025 optval = -1;
2026 INP_UNLOCK(inp);
2027 error = sooptcopyout(sopt, &optval, sizeof optval);
2028 break;
2029
2030 case IP_MULTICAST_IF:
2031 if (imo == NULL || imo->imo_multicast_ifp == NULL)
2032 addr.s_addr = INADDR_ANY;
2033 else if (imo->imo_multicast_addr.s_addr) {
2034 /* return the value user has set */
2035 addr = imo->imo_multicast_addr;
2036 } else {
2037 IFP_TO_IA(imo->imo_multicast_ifp, ia);
2038 addr.s_addr = (ia == NULL) ? INADDR_ANY
2039 : IA_SIN(ia)->sin_addr.s_addr;
2040 }
2041 INP_UNLOCK(inp);
2042 error = sooptcopyout(sopt, &addr, sizeof addr);
2043 break;
2044
2045 case IP_MULTICAST_TTL:
2046 if (imo == 0)
2047 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
2048 else
2049 optval = coptval = imo->imo_multicast_ttl;
2050 INP_UNLOCK(inp);
2051 if (sopt->sopt_valsize == 1)
2052 error = sooptcopyout(sopt, &coptval, 1);
2053 else
2054 error = sooptcopyout(sopt, &optval, sizeof optval);
2055 break;
2056
2057 case IP_MULTICAST_LOOP:
2058 if (imo == 0)
2059 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
2060 else
2061 optval = coptval = imo->imo_multicast_loop;
2062 INP_UNLOCK(inp);
2063 if (sopt->sopt_valsize == 1)
2064 error = sooptcopyout(sopt, &coptval, 1);
2065 else
2066 error = sooptcopyout(sopt, &optval, sizeof optval);
2067 break;
2068
2069 default:
2070 INP_UNLOCK(inp);
2071 error = ENOPROTOOPT;
2072 break;
2073 }
2074 INP_UNLOCK_ASSERT(inp);
2075
2076 return (error);
2077 }
2078
2079 /*
2080 * Discard the IP multicast options.
2081 */
2082 void
2083 ip_freemoptions(imo)
2084 register struct ip_moptions *imo;
2085 {
2086 register int i;
2087
2088 if (imo != NULL) {
2089 for (i = 0; i < imo->imo_num_memberships; ++i)
2090 in_delmulti(imo->imo_membership[i]);
2091 free(imo, M_IPMOPTS);
2092 }
2093 }
2094
2095 /*
2096 * Routine called from ip_output() to loop back a copy of an IP multicast
2097 * packet to the input queue of a specified interface. Note that this
2098 * calls the output routine of the loopback "driver", but with an interface
2099 * pointer that might NOT be a loopback interface -- evil, but easier than
2100 * replicating that code here.
2101 */
2102 static void
2103 ip_mloopback(ifp, m, dst, hlen)
2104 struct ifnet *ifp;
2105 register struct mbuf *m;
2106 register struct sockaddr_in *dst;
2107 int hlen;
2108 {
2109 register struct ip *ip;
2110 struct mbuf *copym;
2111
2112 copym = m_copy(m, 0, M_COPYALL);
2113 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2114 copym = m_pullup(copym, hlen);
2115 if (copym != NULL) {
2116 /* If needed, compute the checksum and mark it as valid. */
2117 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2118 in_delayed_cksum(copym);
2119 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
2120 copym->m_pkthdr.csum_flags |=
2121 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2122 copym->m_pkthdr.csum_data = 0xffff;
2123 }
2124 /*
2125 * We don't bother to fragment if the IP length is greater
2126 * than the interface's MTU. Can this possibly matter?
2127 */
2128 ip = mtod(copym, struct ip *);
2129 ip->ip_len = htons(ip->ip_len);
2130 ip->ip_off = htons(ip->ip_off);
2131 ip->ip_sum = 0;
2132 ip->ip_sum = in_cksum(copym, hlen);
2133 /*
2134 * NB:
2135 * It's not clear whether there are any lingering
2136 * reentrancy problems in other areas which might
2137 * be exposed by using ip_input directly (in
2138 * particular, everything which modifies the packet
2139 * in-place). Yet another option is using the
2140 * protosw directly to deliver the looped back
2141 * packet. For the moment, we'll err on the side
2142 * of safety by using if_simloop().
2143 */
2144 #if 1 /* XXX */
2145 if (dst->sin_family != AF_INET) {
2146 printf("ip_mloopback: bad address family %d\n",
2147 dst->sin_family);
2148 dst->sin_family = AF_INET;
2149 }
2150 #endif
2151
2152 #ifdef notdef
2153 copym->m_pkthdr.rcvif = ifp;
2154 ip_input(copym);
2155 #else
2156 if_simloop(ifp, copym, dst->sin_family, 0);
2157 #endif
2158 }
2159 }
Cache object: 01d2c63dca7e5e5466401303d62b25f2
|