1 /*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
30 * $FreeBSD: releng/6.1/sys/netinet/ip_output.c 161685 2006-08-28 07:31:11Z cperciva $
31 */
32
33 #include "opt_ipfw.h"
34 #include "opt_ipsec.h"
35 #include "opt_mac.h"
36 #include "opt_mbuf_stress_test.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/mac.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48
49 #include <net/if.h>
50 #include <net/netisr.h>
51 #include <net/pfil.h>
52 #include <net/route.h>
53
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_var.h>
60
61 #include <machine/in_cksum.h>
62
63 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
64
65 #ifdef IPSEC
66 #include <netinet6/ipsec.h>
67 #include <netkey/key.h>
68 #ifdef IPSEC_DEBUG
69 #include <netkey/key_debug.h>
70 #else
71 #define KEYDEBUG(lev,arg)
72 #endif
73 #endif /*IPSEC*/
74
75 #ifdef FAST_IPSEC
76 #include <netipsec/ipsec.h>
77 #include <netipsec/xform.h>
78 #include <netipsec/key.h>
79 #endif /*FAST_IPSEC*/
80
81 #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
82 x, (ntohl(a.s_addr)>>24)&0xFF,\
83 (ntohl(a.s_addr)>>16)&0xFF,\
84 (ntohl(a.s_addr)>>8)&0xFF,\
85 (ntohl(a.s_addr))&0xFF, y);
86
87 u_short ip_id;
88
89 #ifdef MBUF_STRESS_TEST
90 int mbuf_frag_size = 0;
91 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
92 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
93 #endif
94
95 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
96 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
97 static void ip_mloopback
98 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
99 static int ip_getmoptions(struct inpcb *, struct sockopt *);
100 static int ip_pcbopts(struct inpcb *, int, struct mbuf *);
101 static int ip_setmoptions(struct inpcb *, struct sockopt *);
102 static struct ip_moptions *ip_findmoptions(struct inpcb *inp);
103
104 int ip_optcopy(struct ip *, struct ip *);
105
106
107 extern struct protosw inetsw[];
108
109 /*
110 * IP output. The packet in mbuf chain m contains a skeletal IP
111 * header (with len, off, ttl, proto, tos, src, dst).
112 * The mbuf chain containing the packet will be freed.
113 * The mbuf opt, if present, will not be freed.
114 * In the IP forwarding case, the packet will arrive with options already
115 * inserted, so must have a NULL opt pointer.
116 */
117 int
118 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
119 int flags, struct ip_moptions *imo, struct inpcb *inp)
120 {
121 struct ip *ip;
122 struct ifnet *ifp = NULL; /* keep compiler happy */
123 struct mbuf *m0;
124 int hlen = sizeof (struct ip);
125 int len, error = 0;
126 struct sockaddr_in *dst = NULL; /* keep compiler happy */
127 struct in_ifaddr *ia = NULL;
128 int isbroadcast, sw_csum;
129 struct route iproute;
130 struct in_addr odst;
131 #ifdef IPFIREWALL_FORWARD
132 struct m_tag *fwd_tag = NULL;
133 #endif
134 #ifdef IPSEC
135 struct secpolicy *sp = NULL;
136 #endif
137 #ifdef FAST_IPSEC
138 struct secpolicy *sp = NULL;
139 struct tdb_ident *tdbi;
140 struct m_tag *mtag;
141 int s;
142 #endif /* FAST_IPSEC */
143
144 M_ASSERTPKTHDR(m);
145
146 if (ro == NULL) {
147 ro = &iproute;
148 bzero(ro, sizeof (*ro));
149 }
150
151 if (inp != NULL)
152 INP_LOCK_ASSERT(inp);
153
154 if (opt) {
155 len = 0;
156 m = ip_insertoptions(m, opt, &len);
157 if (len != 0)
158 hlen = len;
159 }
160 ip = mtod(m, struct ip *);
161
162 /*
163 * Fill in IP header. If we are not allowing fragmentation,
164 * then the ip_id field is meaningless, but we don't set it
165 * to zero. Doing so causes various problems when devices along
166 * the path (routers, load balancers, firewalls, etc.) illegally
167 * disable DF on our packet. Note that a 16-bit counter
168 * will wrap around in less than 10 seconds at 100 Mbit/s on a
169 * medium with MTU 1500. See Steven M. Bellovin, "A Technique
170 * for Counting NATted Hosts", Proc. IMW'02, available at
171 * <http://www.research.att.com/~smb/papers/fnat.pdf>.
172 */
173 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
174 ip->ip_v = IPVERSION;
175 ip->ip_hl = hlen >> 2;
176 ip->ip_id = ip_newid();
177 ipstat.ips_localout++;
178 } else {
179 hlen = ip->ip_hl << 2;
180 }
181
182 dst = (struct sockaddr_in *)&ro->ro_dst;
183 again:
184 /*
185 * If there is a cached route,
186 * check that it is to the same destination
187 * and is still up. If not, free it and try again.
188 * The address family should also be checked in case of sharing the
189 * cache with IPv6.
190 */
191 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
192 dst->sin_family != AF_INET ||
193 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
194 RTFREE(ro->ro_rt);
195 ro->ro_rt = (struct rtentry *)0;
196 }
197 #ifdef IPFIREWALL_FORWARD
198 if (ro->ro_rt == NULL && fwd_tag == NULL) {
199 #else
200 if (ro->ro_rt == NULL) {
201 #endif
202 bzero(dst, sizeof(*dst));
203 dst->sin_family = AF_INET;
204 dst->sin_len = sizeof(*dst);
205 dst->sin_addr = ip->ip_dst;
206 }
207 /*
208 * If routing to interface only,
209 * short circuit routing lookup.
210 */
211 if (flags & IP_ROUTETOIF) {
212 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
213 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
214 ipstat.ips_noroute++;
215 error = ENETUNREACH;
216 goto bad;
217 }
218 ifp = ia->ia_ifp;
219 ip->ip_ttl = 1;
220 isbroadcast = in_broadcast(dst->sin_addr, ifp);
221 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
222 imo != NULL && imo->imo_multicast_ifp != NULL) {
223 /*
224 * Bypass the normal routing lookup for multicast
225 * packets if the interface is specified.
226 */
227 ifp = imo->imo_multicast_ifp;
228 IFP_TO_IA(ifp, ia);
229 isbroadcast = 0; /* fool gcc */
230 } else {
231 /*
232 * We want to do any cloning requested by the link layer,
233 * as this is probably required in all cases for correct
234 * operation (as it is for ARP).
235 */
236 if (ro->ro_rt == NULL)
237 rtalloc_ign(ro, 0);
238 if (ro->ro_rt == NULL) {
239 ipstat.ips_noroute++;
240 error = EHOSTUNREACH;
241 goto bad;
242 }
243 ia = ifatoia(ro->ro_rt->rt_ifa);
244 ifp = ro->ro_rt->rt_ifp;
245 ro->ro_rt->rt_rmx.rmx_pksent++;
246 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
247 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
248 if (ro->ro_rt->rt_flags & RTF_HOST)
249 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
250 else
251 isbroadcast = in_broadcast(dst->sin_addr, ifp);
252 }
253 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
254 struct in_multi *inm;
255
256 m->m_flags |= M_MCAST;
257 /*
258 * IP destination address is multicast. Make sure "dst"
259 * still points to the address in "ro". (It may have been
260 * changed to point to a gateway address, above.)
261 */
262 dst = (struct sockaddr_in *)&ro->ro_dst;
263 /*
264 * See if the caller provided any multicast options
265 */
266 if (imo != NULL) {
267 ip->ip_ttl = imo->imo_multicast_ttl;
268 if (imo->imo_multicast_vif != -1)
269 ip->ip_src.s_addr =
270 ip_mcast_src ?
271 ip_mcast_src(imo->imo_multicast_vif) :
272 INADDR_ANY;
273 } else
274 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
275 /*
276 * Confirm that the outgoing interface supports multicast.
277 */
278 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
279 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
280 ipstat.ips_noroute++;
281 error = ENETUNREACH;
282 goto bad;
283 }
284 }
285 /*
286 * If source address not specified yet, use address
287 * of outgoing interface.
288 */
289 if (ip->ip_src.s_addr == INADDR_ANY) {
290 /* Interface may have no addresses. */
291 if (ia != NULL)
292 ip->ip_src = IA_SIN(ia)->sin_addr;
293 }
294
295 IN_MULTI_LOCK();
296 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
297 if (inm != NULL &&
298 (imo == NULL || imo->imo_multicast_loop)) {
299 IN_MULTI_UNLOCK();
300 /*
301 * If we belong to the destination multicast group
302 * on the outgoing interface, and the caller did not
303 * forbid loopback, loop back a copy.
304 */
305 ip_mloopback(ifp, m, dst, hlen);
306 }
307 else {
308 IN_MULTI_UNLOCK();
309 /*
310 * If we are acting as a multicast router, perform
311 * multicast forwarding as if the packet had just
312 * arrived on the interface to which we are about
313 * to send. The multicast forwarding function
314 * recursively calls this function, using the
315 * IP_FORWARDING flag to prevent infinite recursion.
316 *
317 * Multicasts that are looped back by ip_mloopback(),
318 * above, will be forwarded by the ip_input() routine,
319 * if necessary.
320 */
321 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
322 /*
323 * If rsvp daemon is not running, do not
324 * set ip_moptions. This ensures that the packet
325 * is multicast and not just sent down one link
326 * as prescribed by rsvpd.
327 */
328 if (!rsvp_on)
329 imo = NULL;
330 if (ip_mforward &&
331 ip_mforward(ip, ifp, m, imo) != 0) {
332 m_freem(m);
333 goto done;
334 }
335 }
336 }
337
338 /*
339 * Multicasts with a time-to-live of zero may be looped-
340 * back, above, but must not be transmitted on a network.
341 * Also, multicasts addressed to the loopback interface
342 * are not sent -- the above call to ip_mloopback() will
343 * loop back a copy if this host actually belongs to the
344 * destination group on the loopback interface.
345 */
346 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
347 m_freem(m);
348 goto done;
349 }
350
351 goto sendit;
352 }
353 #ifndef notdef
354 /*
355 * If the source address is not specified yet, use the address
356 * of the outoing interface.
357 */
358 if (ip->ip_src.s_addr == INADDR_ANY) {
359 /* Interface may have no addresses. */
360 if (ia != NULL) {
361 ip->ip_src = IA_SIN(ia)->sin_addr;
362 }
363 }
364 #endif /* notdef */
365 /*
366 * Verify that we have any chance at all of being able to queue the
367 * packet or packet fragments, unless ALTQ is enabled on the given
368 * interface in which case packetdrop should be done by queueing.
369 */
370 #ifdef ALTQ
371 if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
372 ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
373 ifp->if_snd.ifq_maxlen))
374 #else
375 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
376 ifp->if_snd.ifq_maxlen)
377 #endif /* ALTQ */
378 {
379 error = ENOBUFS;
380 ipstat.ips_odropped++;
381 ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
382 goto bad;
383 }
384
385 /*
386 * Look for broadcast address and
387 * verify user is allowed to send
388 * such a packet.
389 */
390 if (isbroadcast) {
391 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
392 error = EADDRNOTAVAIL;
393 goto bad;
394 }
395 if ((flags & IP_ALLOWBROADCAST) == 0) {
396 error = EACCES;
397 goto bad;
398 }
399 /* don't allow broadcast messages to be fragmented */
400 if (ip->ip_len > ifp->if_mtu) {
401 error = EMSGSIZE;
402 goto bad;
403 }
404 if (flags & IP_SENDONES)
405 ip->ip_dst.s_addr = INADDR_BROADCAST;
406 m->m_flags |= M_BCAST;
407 } else {
408 m->m_flags &= ~M_BCAST;
409 }
410
411 sendit:
412 #ifdef IPSEC
413 /* get SP for this packet */
414 if (inp == NULL)
415 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
416 flags, &error);
417 else
418 sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
419
420 if (sp == NULL) {
421 ipsecstat.out_inval++;
422 goto bad;
423 }
424
425 error = 0;
426
427 /* check policy */
428 switch (sp->policy) {
429 case IPSEC_POLICY_DISCARD:
430 /*
431 * This packet is just discarded.
432 */
433 ipsecstat.out_polvio++;
434 goto bad;
435
436 case IPSEC_POLICY_BYPASS:
437 case IPSEC_POLICY_NONE:
438 case IPSEC_POLICY_TCP:
439 /* no need to do IPsec. */
440 goto skip_ipsec;
441
442 case IPSEC_POLICY_IPSEC:
443 if (sp->req == NULL) {
444 /* acquire a policy */
445 error = key_spdacquire(sp);
446 goto bad;
447 }
448 break;
449
450 case IPSEC_POLICY_ENTRUST:
451 default:
452 printf("ip_output: Invalid policy found. %d\n", sp->policy);
453 }
454 {
455 struct ipsec_output_state state;
456 bzero(&state, sizeof(state));
457 state.m = m;
458 if (flags & IP_ROUTETOIF) {
459 state.ro = &iproute;
460 bzero(&iproute, sizeof(iproute));
461 } else
462 state.ro = ro;
463 state.dst = (struct sockaddr *)dst;
464
465 ip->ip_sum = 0;
466
467 /*
468 * XXX
469 * delayed checksums are not currently compatible with IPsec
470 */
471 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
472 in_delayed_cksum(m);
473 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
474 }
475
476 ip->ip_len = htons(ip->ip_len);
477 ip->ip_off = htons(ip->ip_off);
478
479 error = ipsec4_output(&state, sp, flags);
480
481 m = state.m;
482 if (flags & IP_ROUTETOIF) {
483 /*
484 * if we have tunnel mode SA, we may need to ignore
485 * IP_ROUTETOIF.
486 */
487 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
488 flags &= ~IP_ROUTETOIF;
489 ro = state.ro;
490 }
491 } else
492 ro = state.ro;
493 dst = (struct sockaddr_in *)state.dst;
494 if (error) {
495 /* mbuf is already reclaimed in ipsec4_output. */
496 m = NULL;
497 switch (error) {
498 case EHOSTUNREACH:
499 case ENETUNREACH:
500 case EMSGSIZE:
501 case ENOBUFS:
502 case ENOMEM:
503 break;
504 default:
505 printf("ip4_output (ipsec): error code %d\n", error);
506 /*fall through*/
507 case ENOENT:
508 /* don't show these error codes to the user */
509 error = 0;
510 break;
511 }
512 goto bad;
513 }
514
515 /* be sure to update variables that are affected by ipsec4_output() */
516 ip = mtod(m, struct ip *);
517 hlen = ip->ip_hl << 2;
518 if (ro->ro_rt == NULL) {
519 if ((flags & IP_ROUTETOIF) == 0) {
520 printf("ip_output: "
521 "can't update route after IPsec processing\n");
522 error = EHOSTUNREACH; /*XXX*/
523 goto bad;
524 }
525 } else {
526 if (state.encap) {
527 ia = ifatoia(ro->ro_rt->rt_ifa);
528 ifp = ro->ro_rt->rt_ifp;
529 }
530 }
531 }
532
533 /* make it flipped, again. */
534 ip->ip_len = ntohs(ip->ip_len);
535 ip->ip_off = ntohs(ip->ip_off);
536 skip_ipsec:
537 #endif /*IPSEC*/
538 #ifdef FAST_IPSEC
539 /*
540 * Check the security policy (SP) for the packet and, if
541 * required, do IPsec-related processing. There are two
542 * cases here; the first time a packet is sent through
543 * it will be untagged and handled by ipsec4_checkpolicy.
544 * If the packet is resubmitted to ip_output (e.g. after
545 * AH, ESP, etc. processing), there will be a tag to bypass
546 * the lookup and related policy checking.
547 */
548 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
549 s = splnet();
550 if (mtag != NULL) {
551 tdbi = (struct tdb_ident *)(mtag + 1);
552 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
553 if (sp == NULL)
554 error = -EINVAL; /* force silent drop */
555 m_tag_delete(m, mtag);
556 } else {
557 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
558 &error, inp);
559 }
560 /*
561 * There are four return cases:
562 * sp != NULL apply IPsec policy
563 * sp == NULL, error == 0 no IPsec handling needed
564 * sp == NULL, error == -EINVAL discard packet w/o error
565 * sp == NULL, error != 0 discard packet, report error
566 */
567 if (sp != NULL) {
568 /* Loop detection, check if ipsec processing already done */
569 KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
570 for (mtag = m_tag_first(m); mtag != NULL;
571 mtag = m_tag_next(m, mtag)) {
572 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
573 continue;
574 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
575 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
576 continue;
577 /*
578 * Check if policy has an SA associated with it.
579 * This can happen when an SP has yet to acquire
580 * an SA; e.g. on first reference. If it occurs,
581 * then we let ipsec4_process_packet do its thing.
582 */
583 if (sp->req->sav == NULL)
584 break;
585 tdbi = (struct tdb_ident *)(mtag + 1);
586 if (tdbi->spi == sp->req->sav->spi &&
587 tdbi->proto == sp->req->sav->sah->saidx.proto &&
588 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
589 sizeof (union sockaddr_union)) == 0) {
590 /*
591 * No IPsec processing is needed, free
592 * reference to SP.
593 *
594 * NB: null pointer to avoid free at
595 * done: below.
596 */
597 KEY_FREESP(&sp), sp = NULL;
598 splx(s);
599 goto spd_done;
600 }
601 }
602
603 /*
604 * Do delayed checksums now because we send before
605 * this is done in the normal processing path.
606 */
607 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
608 in_delayed_cksum(m);
609 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
610 }
611
612 ip->ip_len = htons(ip->ip_len);
613 ip->ip_off = htons(ip->ip_off);
614
615 /* NB: callee frees mbuf */
616 error = ipsec4_process_packet(m, sp->req, flags, 0);
617 /*
618 * Preserve KAME behaviour: ENOENT can be returned
619 * when an SA acquire is in progress. Don't propagate
620 * this to user-level; it confuses applications.
621 *
622 * XXX this will go away when the SADB is redone.
623 */
624 if (error == ENOENT)
625 error = 0;
626 splx(s);
627 goto done;
628 } else {
629 splx(s);
630
631 if (error != 0) {
632 /*
633 * Hack: -EINVAL is used to signal that a packet
634 * should be silently discarded. This is typically
635 * because we asked key management for an SA and
636 * it was delayed (e.g. kicked up to IKE).
637 */
638 if (error == -EINVAL)
639 error = 0;
640 goto bad;
641 } else {
642 /* No IPsec processing for this packet. */
643 }
644 #ifdef notyet
645 /*
646 * If deferred crypto processing is needed, check that
647 * the interface supports it.
648 */
649 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
650 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
651 /* notify IPsec to do its own crypto */
652 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
653 error = EHOSTUNREACH;
654 goto bad;
655 }
656 #endif
657 }
658 spd_done:
659 #endif /* FAST_IPSEC */
660
661 /* Jump over all PFIL processing if hooks are not active. */
662 if (inet_pfil_hook.ph_busy_count == -1)
663 goto passout;
664
665 /* Run through list of hooks for output packets. */
666 odst.s_addr = ip->ip_dst.s_addr;
667 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
668 if (error != 0 || m == NULL)
669 goto done;
670
671 ip = mtod(m, struct ip *);
672
673 /* See if destination IP address was changed by packet filter. */
674 if (odst.s_addr != ip->ip_dst.s_addr) {
675 m->m_flags |= M_SKIP_FIREWALL;
676 /* If destination is now ourself drop to ip_input(). */
677 if (in_localip(ip->ip_dst)) {
678 m->m_flags |= M_FASTFWD_OURS;
679 if (m->m_pkthdr.rcvif == NULL)
680 m->m_pkthdr.rcvif = loif;
681 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
682 m->m_pkthdr.csum_flags |=
683 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
684 m->m_pkthdr.csum_data = 0xffff;
685 }
686 m->m_pkthdr.csum_flags |=
687 CSUM_IP_CHECKED | CSUM_IP_VALID;
688
689 error = netisr_queue(NETISR_IP, m);
690 goto done;
691 } else
692 goto again; /* Redo the routing table lookup. */
693 }
694
695 #ifdef IPFIREWALL_FORWARD
696 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
697 if (m->m_flags & M_FASTFWD_OURS) {
698 if (m->m_pkthdr.rcvif == NULL)
699 m->m_pkthdr.rcvif = loif;
700 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
701 m->m_pkthdr.csum_flags |=
702 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
703 m->m_pkthdr.csum_data = 0xffff;
704 }
705 m->m_pkthdr.csum_flags |=
706 CSUM_IP_CHECKED | CSUM_IP_VALID;
707
708 error = netisr_queue(NETISR_IP, m);
709 goto done;
710 }
711 /* Or forward to some other address? */
712 fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
713 if (fwd_tag) {
714 #ifndef IPFIREWALL_FORWARD_EXTENDED
715 if (!in_localip(ip->ip_src) && !in_localaddr(ip->ip_dst)) {
716 #endif
717 dst = (struct sockaddr_in *)&ro->ro_dst;
718 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
719 m->m_flags |= M_SKIP_FIREWALL;
720 m_tag_delete(m, fwd_tag);
721 goto again;
722 #ifndef IPFIREWALL_FORWARD_EXTENDED
723 } else {
724 m_tag_delete(m, fwd_tag);
725 /* Continue. */
726 }
727 #endif
728 }
729 #endif /* IPFIREWALL_FORWARD */
730
731 passout:
732 /* 127/8 must not appear on wire - RFC1122. */
733 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
734 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
735 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
736 ipstat.ips_badaddr++;
737 error = EADDRNOTAVAIL;
738 goto bad;
739 }
740 }
741
742 m->m_pkthdr.csum_flags |= CSUM_IP;
743 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
744 if (sw_csum & CSUM_DELAY_DATA) {
745 in_delayed_cksum(m);
746 sw_csum &= ~CSUM_DELAY_DATA;
747 }
748 m->m_pkthdr.csum_flags &= ifp->if_hwassist;
749
750 /*
751 * If small enough for interface, or the interface will take
752 * care of the fragmentation for us, can just send directly.
753 */
754 if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
755 ((ip->ip_off & IP_DF) == 0))) {
756 ip->ip_len = htons(ip->ip_len);
757 ip->ip_off = htons(ip->ip_off);
758 ip->ip_sum = 0;
759 if (sw_csum & CSUM_DELAY_IP)
760 ip->ip_sum = in_cksum(m, hlen);
761
762 /* Record statistics for this interface address. */
763 if (!(flags & IP_FORWARDING) && ia) {
764 ia->ia_ifa.if_opackets++;
765 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
766 }
767
768 #ifdef IPSEC
769 /* clean ipsec history once it goes out of the node */
770 ipsec_delaux(m);
771 #endif
772
773 #ifdef MBUF_STRESS_TEST
774 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
775 m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
776 #endif
777 error = (*ifp->if_output)(ifp, m,
778 (struct sockaddr *)dst, ro->ro_rt);
779 goto done;
780 }
781
782 if (ip->ip_off & IP_DF) {
783 error = EMSGSIZE;
784 /*
785 * This case can happen if the user changed the MTU
786 * of an interface after enabling IP on it. Because
787 * most netifs don't keep track of routes pointing to
788 * them, there is no way for one to update all its
789 * routes when the MTU is changed.
790 */
791 if (ro != NULL &&
792 (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
793 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
794 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
795 }
796 ipstat.ips_cantfrag++;
797 goto bad;
798 }
799
800 /*
801 * Too large for interface; fragment if possible. If successful,
802 * on return, m will point to a list of packets to be sent.
803 */
804 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
805 if (error)
806 goto bad;
807 for (; m; m = m0) {
808 m0 = m->m_nextpkt;
809 m->m_nextpkt = 0;
810 #ifdef IPSEC
811 /* clean ipsec history once it goes out of the node */
812 ipsec_delaux(m);
813 #endif
814 if (error == 0) {
815 /* Record statistics for this interface address. */
816 if (ia != NULL) {
817 ia->ia_ifa.if_opackets++;
818 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
819 }
820
821 error = (*ifp->if_output)(ifp, m,
822 (struct sockaddr *)dst, ro->ro_rt);
823 } else
824 m_freem(m);
825 }
826
827 if (error == 0)
828 ipstat.ips_fragmented++;
829
830 done:
831 if (ro == &iproute && ro->ro_rt) {
832 RTFREE(ro->ro_rt);
833 }
834 #ifdef IPSEC
835 if (sp != NULL) {
836 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
837 printf("DP ip_output call free SP:%p\n", sp));
838 key_freesp(sp);
839 }
840 #endif
841 #ifdef FAST_IPSEC
842 if (sp != NULL)
843 KEY_FREESP(&sp);
844 #endif
845 return (error);
846 bad:
847 m_freem(m);
848 goto done;
849 }
850
851 /*
852 * Create a chain of fragments which fit the given mtu. m_frag points to the
853 * mbuf to be fragmented; on return it points to the chain with the fragments.
854 * Return 0 if no error. If error, m_frag may contain a partially built
855 * chain of fragments that should be freed by the caller.
856 *
857 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
858 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
859 */
860 int
861 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
862 u_long if_hwassist_flags, int sw_csum)
863 {
864 int error = 0;
865 int hlen = ip->ip_hl << 2;
866 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */
867 int off;
868 struct mbuf *m0 = *m_frag; /* the original packet */
869 int firstlen;
870 struct mbuf **mnext;
871 int nfrags;
872
873 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
874 ipstat.ips_cantfrag++;
875 return EMSGSIZE;
876 }
877
878 /*
879 * Must be able to put at least 8 bytes per fragment.
880 */
881 if (len < 8)
882 return EMSGSIZE;
883
884 /*
885 * If the interface will not calculate checksums on
886 * fragmented packets, then do it here.
887 */
888 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
889 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
890 in_delayed_cksum(m0);
891 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
892 }
893
894 if (len > PAGE_SIZE) {
895 /*
896 * Fragment large datagrams such that each segment
897 * contains a multiple of PAGE_SIZE amount of data,
898 * plus headers. This enables a receiver to perform
899 * page-flipping zero-copy optimizations.
900 *
901 * XXX When does this help given that sender and receiver
902 * could have different page sizes, and also mtu could
903 * be less than the receiver's page size ?
904 */
905 int newlen;
906 struct mbuf *m;
907
908 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
909 off += m->m_len;
910
911 /*
912 * firstlen (off - hlen) must be aligned on an
913 * 8-byte boundary
914 */
915 if (off < hlen)
916 goto smart_frag_failure;
917 off = ((off - hlen) & ~7) + hlen;
918 newlen = (~PAGE_MASK) & mtu;
919 if ((newlen + sizeof (struct ip)) > mtu) {
920 /* we failed, go back the default */
921 smart_frag_failure:
922 newlen = len;
923 off = hlen + len;
924 }
925 len = newlen;
926
927 } else {
928 off = hlen + len;
929 }
930
931 firstlen = off - hlen;
932 mnext = &m0->m_nextpkt; /* pointer to next packet */
933
934 /*
935 * Loop through length of segment after first fragment,
936 * make new header and copy data of each part and link onto chain.
937 * Here, m0 is the original packet, m is the fragment being created.
938 * The fragments are linked off the m_nextpkt of the original
939 * packet, which after processing serves as the first fragment.
940 */
941 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
942 struct ip *mhip; /* ip header on the fragment */
943 struct mbuf *m;
944 int mhlen = sizeof (struct ip);
945
946 MGETHDR(m, M_DONTWAIT, MT_HEADER);
947 if (m == NULL) {
948 error = ENOBUFS;
949 ipstat.ips_odropped++;
950 goto done;
951 }
952 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
953 /*
954 * In the first mbuf, leave room for the link header, then
955 * copy the original IP header including options. The payload
956 * goes into an additional mbuf chain returned by m_copy().
957 */
958 m->m_data += max_linkhdr;
959 mhip = mtod(m, struct ip *);
960 *mhip = *ip;
961 if (hlen > sizeof (struct ip)) {
962 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
963 mhip->ip_v = IPVERSION;
964 mhip->ip_hl = mhlen >> 2;
965 }
966 m->m_len = mhlen;
967 /* XXX do we need to add ip->ip_off below ? */
968 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
969 if (off + len >= ip->ip_len) { /* last fragment */
970 len = ip->ip_len - off;
971 m->m_flags |= M_LASTFRAG;
972 } else
973 mhip->ip_off |= IP_MF;
974 mhip->ip_len = htons((u_short)(len + mhlen));
975 m->m_next = m_copy(m0, off, len);
976 if (m->m_next == NULL) { /* copy failed */
977 m_free(m);
978 error = ENOBUFS; /* ??? */
979 ipstat.ips_odropped++;
980 goto done;
981 }
982 m->m_pkthdr.len = mhlen + len;
983 m->m_pkthdr.rcvif = NULL;
984 #ifdef MAC
985 mac_create_fragment(m0, m);
986 #endif
987 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
988 mhip->ip_off = htons(mhip->ip_off);
989 mhip->ip_sum = 0;
990 if (sw_csum & CSUM_DELAY_IP)
991 mhip->ip_sum = in_cksum(m, mhlen);
992 *mnext = m;
993 mnext = &m->m_nextpkt;
994 }
995 ipstat.ips_ofragments += nfrags;
996
997 /* set first marker for fragment chain */
998 m0->m_flags |= M_FIRSTFRAG | M_FRAG;
999 m0->m_pkthdr.csum_data = nfrags;
1000
1001 /*
1002 * Update first fragment by trimming what's been copied out
1003 * and updating header.
1004 */
1005 m_adj(m0, hlen + firstlen - ip->ip_len);
1006 m0->m_pkthdr.len = hlen + firstlen;
1007 ip->ip_len = htons((u_short)m0->m_pkthdr.len);
1008 ip->ip_off |= IP_MF;
1009 ip->ip_off = htons(ip->ip_off);
1010 ip->ip_sum = 0;
1011 if (sw_csum & CSUM_DELAY_IP)
1012 ip->ip_sum = in_cksum(m0, hlen);
1013
1014 done:
1015 *m_frag = m0;
1016 return error;
1017 }
1018
1019 void
1020 in_delayed_cksum(struct mbuf *m)
1021 {
1022 struct ip *ip;
1023 u_short csum, offset;
1024
1025 ip = mtod(m, struct ip *);
1026 offset = ip->ip_hl << 2 ;
1027 csum = in_cksum_skip(m, ip->ip_len, offset);
1028 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1029 csum = 0xffff;
1030 offset += m->m_pkthdr.csum_data; /* checksum offset */
1031
1032 if (offset + sizeof(u_short) > m->m_len) {
1033 printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
1034 m->m_len, offset, ip->ip_p);
1035 /*
1036 * XXX
1037 * this shouldn't happen, but if it does, the
1038 * correct behavior may be to insert the checksum
1039 * in the appropriate next mbuf in the chain.
1040 */
1041 return;
1042 }
1043 *(u_short *)(m->m_data + offset) = csum;
1044 }
1045
1046 /*
1047 * Insert IP options into preformed packet.
1048 * Adjust IP destination as required for IP source routing,
1049 * as indicated by a non-zero in_addr at the start of the options.
1050 *
1051 * XXX This routine assumes that the packet has no options in place.
1052 */
1053 static struct mbuf *
1054 ip_insertoptions(m, opt, phlen)
1055 register struct mbuf *m;
1056 struct mbuf *opt;
1057 int *phlen;
1058 {
1059 register struct ipoption *p = mtod(opt, struct ipoption *);
1060 struct mbuf *n;
1061 register struct ip *ip = mtod(m, struct ip *);
1062 unsigned optlen;
1063
1064 optlen = opt->m_len - sizeof(p->ipopt_dst);
1065 if (optlen + ip->ip_len > IP_MAXPACKET) {
1066 *phlen = 0;
1067 return (m); /* XXX should fail */
1068 }
1069 if (p->ipopt_dst.s_addr)
1070 ip->ip_dst = p->ipopt_dst;
1071 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1072 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1073 if (n == NULL) {
1074 *phlen = 0;
1075 return (m);
1076 }
1077 M_MOVE_PKTHDR(n, m);
1078 n->m_pkthdr.rcvif = NULL;
1079 #ifdef MAC
1080 mac_copy_mbuf(m, n);
1081 #endif
1082 n->m_pkthdr.len += optlen;
1083 m->m_len -= sizeof(struct ip);
1084 m->m_data += sizeof(struct ip);
1085 n->m_next = m;
1086 m = n;
1087 m->m_len = optlen + sizeof(struct ip);
1088 m->m_data += max_linkhdr;
1089 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1090 } else {
1091 m->m_data -= optlen;
1092 m->m_len += optlen;
1093 m->m_pkthdr.len += optlen;
1094 bcopy(ip, mtod(m, void *), sizeof(struct ip));
1095 }
1096 ip = mtod(m, struct ip *);
1097 bcopy(p->ipopt_list, ip + 1, optlen);
1098 *phlen = sizeof(struct ip) + optlen;
1099 ip->ip_v = IPVERSION;
1100 ip->ip_hl = *phlen >> 2;
1101 ip->ip_len += optlen;
1102 return (m);
1103 }
1104
1105 /*
1106 * Copy options from ip to jp,
1107 * omitting those not copied during fragmentation.
1108 */
1109 int
1110 ip_optcopy(ip, jp)
1111 struct ip *ip, *jp;
1112 {
1113 register u_char *cp, *dp;
1114 int opt, optlen, cnt;
1115
1116 cp = (u_char *)(ip + 1);
1117 dp = (u_char *)(jp + 1);
1118 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1119 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1120 opt = cp[0];
1121 if (opt == IPOPT_EOL)
1122 break;
1123 if (opt == IPOPT_NOP) {
1124 /* Preserve for IP mcast tunnel's LSRR alignment. */
1125 *dp++ = IPOPT_NOP;
1126 optlen = 1;
1127 continue;
1128 }
1129
1130 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
1131 ("ip_optcopy: malformed ipv4 option"));
1132 optlen = cp[IPOPT_OLEN];
1133 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
1134 ("ip_optcopy: malformed ipv4 option"));
1135
1136 /* bogus lengths should have been caught by ip_dooptions */
1137 if (optlen > cnt)
1138 optlen = cnt;
1139 if (IPOPT_COPIED(opt)) {
1140 bcopy(cp, dp, optlen);
1141 dp += optlen;
1142 }
1143 }
1144 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1145 *dp++ = IPOPT_EOL;
1146 return (optlen);
1147 }
1148
1149 /*
1150 * IP socket option processing.
1151 */
1152 int
1153 ip_ctloutput(so, sopt)
1154 struct socket *so;
1155 struct sockopt *sopt;
1156 {
1157 struct inpcb *inp = sotoinpcb(so);
1158 int error, optval;
1159
1160 error = optval = 0;
1161 if (sopt->sopt_level != IPPROTO_IP) {
1162 return (EINVAL);
1163 }
1164
1165 if (inp == NULL)
1166 return (EINVAL);
1167
1168 switch (sopt->sopt_dir) {
1169 case SOPT_SET:
1170 switch (sopt->sopt_name) {
1171 case IP_OPTIONS:
1172 #ifdef notyet
1173 case IP_RETOPTS:
1174 #endif
1175 {
1176 struct mbuf *m;
1177 if (sopt->sopt_valsize > MLEN) {
1178 error = EMSGSIZE;
1179 break;
1180 }
1181 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
1182 if (m == NULL) {
1183 error = ENOBUFS;
1184 break;
1185 }
1186 m->m_len = sopt->sopt_valsize;
1187 error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1188 m->m_len);
1189 INP_LOCK(inp);
1190 error = ip_pcbopts(inp, sopt->sopt_name, m);
1191 INP_UNLOCK(inp);
1192 return (error);
1193 }
1194
1195 case IP_TOS:
1196 case IP_TTL:
1197 case IP_MINTTL:
1198 case IP_RECVOPTS:
1199 case IP_RECVRETOPTS:
1200 case IP_RECVDSTADDR:
1201 case IP_RECVTTL:
1202 case IP_RECVIF:
1203 case IP_FAITH:
1204 case IP_ONESBCAST:
1205 case IP_DONTFRAG:
1206 error = sooptcopyin(sopt, &optval, sizeof optval,
1207 sizeof optval);
1208 if (error)
1209 break;
1210
1211 switch (sopt->sopt_name) {
1212 case IP_TOS:
1213 inp->inp_ip_tos = optval;
1214 break;
1215
1216 case IP_TTL:
1217 inp->inp_ip_ttl = optval;
1218 break;
1219
1220 case IP_MINTTL:
1221 if (optval > 0 && optval <= MAXTTL)
1222 inp->inp_ip_minttl = optval;
1223 else
1224 error = EINVAL;
1225 break;
1226
1227 #define OPTSET(bit) do { \
1228 INP_LOCK(inp); \
1229 if (optval) \
1230 inp->inp_flags |= bit; \
1231 else \
1232 inp->inp_flags &= ~bit; \
1233 INP_UNLOCK(inp); \
1234 } while (0)
1235
1236 case IP_RECVOPTS:
1237 OPTSET(INP_RECVOPTS);
1238 break;
1239
1240 case IP_RECVRETOPTS:
1241 OPTSET(INP_RECVRETOPTS);
1242 break;
1243
1244 case IP_RECVDSTADDR:
1245 OPTSET(INP_RECVDSTADDR);
1246 break;
1247
1248 case IP_RECVTTL:
1249 OPTSET(INP_RECVTTL);
1250 break;
1251
1252 case IP_RECVIF:
1253 OPTSET(INP_RECVIF);
1254 break;
1255
1256 case IP_FAITH:
1257 OPTSET(INP_FAITH);
1258 break;
1259
1260 case IP_ONESBCAST:
1261 OPTSET(INP_ONESBCAST);
1262 break;
1263 case IP_DONTFRAG:
1264 OPTSET(INP_DONTFRAG);
1265 break;
1266 }
1267 break;
1268 #undef OPTSET
1269
1270 case IP_MULTICAST_IF:
1271 case IP_MULTICAST_VIF:
1272 case IP_MULTICAST_TTL:
1273 case IP_MULTICAST_LOOP:
1274 case IP_ADD_MEMBERSHIP:
1275 case IP_DROP_MEMBERSHIP:
1276 error = ip_setmoptions(inp, sopt);
1277 break;
1278
1279 case IP_PORTRANGE:
1280 error = sooptcopyin(sopt, &optval, sizeof optval,
1281 sizeof optval);
1282 if (error)
1283 break;
1284
1285 INP_LOCK(inp);
1286 switch (optval) {
1287 case IP_PORTRANGE_DEFAULT:
1288 inp->inp_flags &= ~(INP_LOWPORT);
1289 inp->inp_flags &= ~(INP_HIGHPORT);
1290 break;
1291
1292 case IP_PORTRANGE_HIGH:
1293 inp->inp_flags &= ~(INP_LOWPORT);
1294 inp->inp_flags |= INP_HIGHPORT;
1295 break;
1296
1297 case IP_PORTRANGE_LOW:
1298 inp->inp_flags &= ~(INP_HIGHPORT);
1299 inp->inp_flags |= INP_LOWPORT;
1300 break;
1301
1302 default:
1303 error = EINVAL;
1304 break;
1305 }
1306 INP_UNLOCK(inp);
1307 break;
1308
1309 #if defined(IPSEC) || defined(FAST_IPSEC)
1310 case IP_IPSEC_POLICY:
1311 {
1312 caddr_t req;
1313 size_t len = 0;
1314 int priv;
1315 struct mbuf *m;
1316 int optname;
1317
1318 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1319 break;
1320 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1321 break;
1322 priv = (sopt->sopt_td != NULL &&
1323 suser(sopt->sopt_td) != 0) ? 0 : 1;
1324 req = mtod(m, caddr_t);
1325 len = m->m_len;
1326 optname = sopt->sopt_name;
1327 error = ipsec4_set_policy(inp, optname, req, len, priv);
1328 m_freem(m);
1329 break;
1330 }
1331 #endif /*IPSEC*/
1332
1333 default:
1334 error = ENOPROTOOPT;
1335 break;
1336 }
1337 break;
1338
1339 case SOPT_GET:
1340 switch (sopt->sopt_name) {
1341 case IP_OPTIONS:
1342 case IP_RETOPTS:
1343 if (inp->inp_options)
1344 error = sooptcopyout(sopt,
1345 mtod(inp->inp_options,
1346 char *),
1347 inp->inp_options->m_len);
1348 else
1349 sopt->sopt_valsize = 0;
1350 break;
1351
1352 case IP_TOS:
1353 case IP_TTL:
1354 case IP_MINTTL:
1355 case IP_RECVOPTS:
1356 case IP_RECVRETOPTS:
1357 case IP_RECVDSTADDR:
1358 case IP_RECVTTL:
1359 case IP_RECVIF:
1360 case IP_PORTRANGE:
1361 case IP_FAITH:
1362 case IP_ONESBCAST:
1363 case IP_DONTFRAG:
1364 switch (sopt->sopt_name) {
1365
1366 case IP_TOS:
1367 optval = inp->inp_ip_tos;
1368 break;
1369
1370 case IP_TTL:
1371 optval = inp->inp_ip_ttl;
1372 break;
1373
1374 case IP_MINTTL:
1375 optval = inp->inp_ip_minttl;
1376 break;
1377
1378 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1379
1380 case IP_RECVOPTS:
1381 optval = OPTBIT(INP_RECVOPTS);
1382 break;
1383
1384 case IP_RECVRETOPTS:
1385 optval = OPTBIT(INP_RECVRETOPTS);
1386 break;
1387
1388 case IP_RECVDSTADDR:
1389 optval = OPTBIT(INP_RECVDSTADDR);
1390 break;
1391
1392 case IP_RECVTTL:
1393 optval = OPTBIT(INP_RECVTTL);
1394 break;
1395
1396 case IP_RECVIF:
1397 optval = OPTBIT(INP_RECVIF);
1398 break;
1399
1400 case IP_PORTRANGE:
1401 if (inp->inp_flags & INP_HIGHPORT)
1402 optval = IP_PORTRANGE_HIGH;
1403 else if (inp->inp_flags & INP_LOWPORT)
1404 optval = IP_PORTRANGE_LOW;
1405 else
1406 optval = 0;
1407 break;
1408
1409 case IP_FAITH:
1410 optval = OPTBIT(INP_FAITH);
1411 break;
1412
1413 case IP_ONESBCAST:
1414 optval = OPTBIT(INP_ONESBCAST);
1415 break;
1416 case IP_DONTFRAG:
1417 optval = OPTBIT(INP_DONTFRAG);
1418 break;
1419 }
1420 error = sooptcopyout(sopt, &optval, sizeof optval);
1421 break;
1422
1423 case IP_MULTICAST_IF:
1424 case IP_MULTICAST_VIF:
1425 case IP_MULTICAST_TTL:
1426 case IP_MULTICAST_LOOP:
1427 case IP_ADD_MEMBERSHIP:
1428 case IP_DROP_MEMBERSHIP:
1429 error = ip_getmoptions(inp, sopt);
1430 break;
1431
1432 #if defined(IPSEC) || defined(FAST_IPSEC)
1433 case IP_IPSEC_POLICY:
1434 {
1435 struct mbuf *m = NULL;
1436 caddr_t req = NULL;
1437 size_t len = 0;
1438
1439 if (m != 0) {
1440 req = mtod(m, caddr_t);
1441 len = m->m_len;
1442 }
1443 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1444 if (error == 0)
1445 error = soopt_mcopyout(sopt, m); /* XXX */
1446 if (error == 0)
1447 m_freem(m);
1448 break;
1449 }
1450 #endif /*IPSEC*/
1451
1452 default:
1453 error = ENOPROTOOPT;
1454 break;
1455 }
1456 break;
1457 }
1458 return (error);
1459 }
1460
1461 /*
1462 * Set up IP options in pcb for insertion in output packets.
1463 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1464 * with destination address if source routed.
1465 */
1466 static int
1467 ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
1468 {
1469 register int cnt, optlen;
1470 register u_char *cp;
1471 struct mbuf **pcbopt;
1472 u_char opt;
1473
1474 INP_LOCK_ASSERT(inp);
1475
1476 pcbopt = &inp->inp_options;
1477
1478 /* turn off any old options */
1479 if (*pcbopt)
1480 (void)m_free(*pcbopt);
1481 *pcbopt = 0;
1482 if (m == NULL || m->m_len == 0) {
1483 /*
1484 * Only turning off any previous options.
1485 */
1486 if (m != NULL)
1487 (void)m_free(m);
1488 return (0);
1489 }
1490
1491 if (m->m_len % sizeof(int32_t))
1492 goto bad;
1493 /*
1494 * IP first-hop destination address will be stored before
1495 * actual options; move other options back
1496 * and clear it when none present.
1497 */
1498 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1499 goto bad;
1500 cnt = m->m_len;
1501 m->m_len += sizeof(struct in_addr);
1502 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1503 bcopy(mtod(m, void *), cp, (unsigned)cnt);
1504 bzero(mtod(m, void *), sizeof(struct in_addr));
1505
1506 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1507 opt = cp[IPOPT_OPTVAL];
1508 if (opt == IPOPT_EOL)
1509 break;
1510 if (opt == IPOPT_NOP)
1511 optlen = 1;
1512 else {
1513 if (cnt < IPOPT_OLEN + sizeof(*cp))
1514 goto bad;
1515 optlen = cp[IPOPT_OLEN];
1516 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1517 goto bad;
1518 }
1519 switch (opt) {
1520
1521 default:
1522 break;
1523
1524 case IPOPT_LSRR:
1525 case IPOPT_SSRR:
1526 /*
1527 * user process specifies route as:
1528 * ->A->B->C->D
1529 * D must be our final destination (but we can't
1530 * check that since we may not have connected yet).
1531 * A is first hop destination, which doesn't appear in
1532 * actual IP option, but is stored before the options.
1533 */
1534 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1535 goto bad;
1536 m->m_len -= sizeof(struct in_addr);
1537 cnt -= sizeof(struct in_addr);
1538 optlen -= sizeof(struct in_addr);
1539 cp[IPOPT_OLEN] = optlen;
1540 /*
1541 * Move first hop before start of options.
1542 */
1543 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1544 sizeof(struct in_addr));
1545 /*
1546 * Then copy rest of options back
1547 * to close up the deleted entry.
1548 */
1549 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
1550 &cp[IPOPT_OFFSET+1],
1551 (unsigned)cnt - (IPOPT_MINOFF - 1));
1552 break;
1553 }
1554 }
1555 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1556 goto bad;
1557 *pcbopt = m;
1558 return (0);
1559
1560 bad:
1561 (void)m_free(m);
1562 return (EINVAL);
1563 }
1564
1565 /*
1566 * XXX
1567 * The whole multicast option thing needs to be re-thought.
1568 * Several of these options are equally applicable to non-multicast
1569 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1570 * standard option (IP_TTL).
1571 */
1572
1573 /*
1574 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1575 */
1576 static struct ifnet *
1577 ip_multicast_if(a, ifindexp)
1578 struct in_addr *a;
1579 int *ifindexp;
1580 {
1581 int ifindex;
1582 struct ifnet *ifp;
1583
1584 if (ifindexp)
1585 *ifindexp = 0;
1586 if (ntohl(a->s_addr) >> 24 == 0) {
1587 ifindex = ntohl(a->s_addr) & 0xffffff;
1588 if (ifindex < 0 || if_index < ifindex)
1589 return NULL;
1590 ifp = ifnet_byindex(ifindex);
1591 if (ifindexp)
1592 *ifindexp = ifindex;
1593 } else {
1594 INADDR_TO_IFP(*a, ifp);
1595 }
1596 return ifp;
1597 }
1598
1599 /*
1600 * Given an inpcb, return its multicast options structure pointer. Accepts
1601 * an unlocked inpcb pointer, but will return it locked. May sleep.
1602 */
1603 static struct ip_moptions *
1604 ip_findmoptions(struct inpcb *inp)
1605 {
1606 struct ip_moptions *imo;
1607
1608 INP_LOCK(inp);
1609 if (inp->inp_moptions != NULL)
1610 return (inp->inp_moptions);
1611
1612 INP_UNLOCK(inp);
1613
1614 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1615
1616 imo->imo_multicast_ifp = NULL;
1617 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1618 imo->imo_multicast_vif = -1;
1619 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1620 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1621 imo->imo_num_memberships = 0;
1622
1623 INP_LOCK(inp);
1624 if (inp->inp_moptions != NULL) {
1625 free(imo, M_IPMOPTS);
1626 return (inp->inp_moptions);
1627 }
1628 inp->inp_moptions = imo;
1629 return (imo);
1630 }
1631
1632 /*
1633 * Set the IP multicast options in response to user setsockopt().
1634 */
1635 static int
1636 ip_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1637 {
1638 int error = 0;
1639 int i;
1640 struct in_addr addr;
1641 struct ip_mreq mreq;
1642 struct ifnet *ifp;
1643 struct ip_moptions *imo;
1644 struct route ro;
1645 struct sockaddr_in *dst;
1646 int ifindex;
1647 int s;
1648
1649 switch (sopt->sopt_name) {
1650 /* store an index number for the vif you wanna use in the send */
1651 case IP_MULTICAST_VIF:
1652 if (legal_vif_num == 0) {
1653 error = EOPNOTSUPP;
1654 break;
1655 }
1656 error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1657 if (error)
1658 break;
1659 if (!legal_vif_num(i) && (i != -1)) {
1660 error = EINVAL;
1661 break;
1662 }
1663 imo = ip_findmoptions(inp);
1664 imo->imo_multicast_vif = i;
1665 INP_UNLOCK(inp);
1666 break;
1667
1668 case IP_MULTICAST_IF:
1669 /*
1670 * Select the interface for outgoing multicast packets.
1671 */
1672 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1673 if (error)
1674 break;
1675 /*
1676 * INADDR_ANY is used to remove a previous selection.
1677 * When no interface is selected, a default one is
1678 * chosen every time a multicast packet is sent.
1679 */
1680 imo = ip_findmoptions(inp);
1681 if (addr.s_addr == INADDR_ANY) {
1682 imo->imo_multicast_ifp = NULL;
1683 INP_UNLOCK(inp);
1684 break;
1685 }
1686 /*
1687 * The selected interface is identified by its local
1688 * IP address. Find the interface and confirm that
1689 * it supports multicasting.
1690 */
1691 s = splimp();
1692 ifp = ip_multicast_if(&addr, &ifindex);
1693 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1694 INP_UNLOCK(inp);
1695 splx(s);
1696 error = EADDRNOTAVAIL;
1697 break;
1698 }
1699 imo->imo_multicast_ifp = ifp;
1700 if (ifindex)
1701 imo->imo_multicast_addr = addr;
1702 else
1703 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1704 INP_UNLOCK(inp);
1705 splx(s);
1706 break;
1707
1708 case IP_MULTICAST_TTL:
1709 /*
1710 * Set the IP time-to-live for outgoing multicast packets.
1711 * The original multicast API required a char argument,
1712 * which is inconsistent with the rest of the socket API.
1713 * We allow either a char or an int.
1714 */
1715 if (sopt->sopt_valsize == 1) {
1716 u_char ttl;
1717 error = sooptcopyin(sopt, &ttl, 1, 1);
1718 if (error)
1719 break;
1720 imo = ip_findmoptions(inp);
1721 imo->imo_multicast_ttl = ttl;
1722 INP_UNLOCK(inp);
1723 } else {
1724 u_int ttl;
1725 error = sooptcopyin(sopt, &ttl, sizeof ttl,
1726 sizeof ttl);
1727 if (error)
1728 break;
1729 if (ttl > 255)
1730 error = EINVAL;
1731 else {
1732 imo = ip_findmoptions(inp);
1733 imo->imo_multicast_ttl = ttl;
1734 INP_UNLOCK(inp);
1735 }
1736 }
1737 break;
1738
1739 case IP_MULTICAST_LOOP:
1740 /*
1741 * Set the loopback flag for outgoing multicast packets.
1742 * Must be zero or one. The original multicast API required a
1743 * char argument, which is inconsistent with the rest
1744 * of the socket API. We allow either a char or an int.
1745 */
1746 if (sopt->sopt_valsize == 1) {
1747 u_char loop;
1748 error = sooptcopyin(sopt, &loop, 1, 1);
1749 if (error)
1750 break;
1751 imo = ip_findmoptions(inp);
1752 imo->imo_multicast_loop = !!loop;
1753 INP_UNLOCK(inp);
1754 } else {
1755 u_int loop;
1756 error = sooptcopyin(sopt, &loop, sizeof loop,
1757 sizeof loop);
1758 if (error)
1759 break;
1760 imo = ip_findmoptions(inp);
1761 imo->imo_multicast_loop = !!loop;
1762 INP_UNLOCK(inp);
1763 }
1764 break;
1765
1766 case IP_ADD_MEMBERSHIP:
1767 /*
1768 * Add a multicast group membership.
1769 * Group must be a valid IP multicast address.
1770 */
1771 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1772 if (error)
1773 break;
1774
1775 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1776 error = EINVAL;
1777 break;
1778 }
1779 s = splimp();
1780 /*
1781 * If no interface address was provided, use the interface of
1782 * the route to the given multicast address.
1783 */
1784 if (mreq.imr_interface.s_addr == INADDR_ANY) {
1785 bzero((caddr_t)&ro, sizeof(ro));
1786 dst = (struct sockaddr_in *)&ro.ro_dst;
1787 dst->sin_len = sizeof(*dst);
1788 dst->sin_family = AF_INET;
1789 dst->sin_addr = mreq.imr_multiaddr;
1790 rtalloc_ign(&ro, RTF_CLONING);
1791 if (ro.ro_rt == NULL) {
1792 error = EADDRNOTAVAIL;
1793 splx(s);
1794 break;
1795 }
1796 ifp = ro.ro_rt->rt_ifp;
1797 RTFREE(ro.ro_rt);
1798 }
1799 else {
1800 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1801 }
1802
1803 /*
1804 * See if we found an interface, and confirm that it
1805 * supports multicast.
1806 */
1807 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1808 error = EADDRNOTAVAIL;
1809 splx(s);
1810 break;
1811 }
1812 /*
1813 * See if the membership already exists or if all the
1814 * membership slots are full.
1815 */
1816 imo = ip_findmoptions(inp);
1817 for (i = 0; i < imo->imo_num_memberships; ++i) {
1818 if (imo->imo_membership[i]->inm_ifp == ifp &&
1819 imo->imo_membership[i]->inm_addr.s_addr
1820 == mreq.imr_multiaddr.s_addr)
1821 break;
1822 }
1823 if (i < imo->imo_num_memberships) {
1824 INP_UNLOCK(inp);
1825 error = EADDRINUSE;
1826 splx(s);
1827 break;
1828 }
1829 if (i == IP_MAX_MEMBERSHIPS) {
1830 INP_UNLOCK(inp);
1831 error = ETOOMANYREFS;
1832 splx(s);
1833 break;
1834 }
1835 /*
1836 * Everything looks good; add a new record to the multicast
1837 * address list for the given interface.
1838 */
1839 if ((imo->imo_membership[i] =
1840 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1841 INP_UNLOCK(inp);
1842 error = ENOBUFS;
1843 splx(s);
1844 break;
1845 }
1846 ++imo->imo_num_memberships;
1847 INP_UNLOCK(inp);
1848 splx(s);
1849 break;
1850
1851 case IP_DROP_MEMBERSHIP:
1852 /*
1853 * Drop a multicast group membership.
1854 * Group must be a valid IP multicast address.
1855 */
1856 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1857 if (error)
1858 break;
1859
1860 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1861 error = EINVAL;
1862 break;
1863 }
1864
1865 s = splimp();
1866 /*
1867 * If an interface address was specified, get a pointer
1868 * to its ifnet structure.
1869 */
1870 if (mreq.imr_interface.s_addr == INADDR_ANY)
1871 ifp = NULL;
1872 else {
1873 ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1874 if (ifp == NULL) {
1875 error = EADDRNOTAVAIL;
1876 splx(s);
1877 break;
1878 }
1879 }
1880 /*
1881 * Find the membership in the membership array.
1882 */
1883 imo = ip_findmoptions(inp);
1884 for (i = 0; i < imo->imo_num_memberships; ++i) {
1885 if ((ifp == NULL ||
1886 imo->imo_membership[i]->inm_ifp == ifp) &&
1887 imo->imo_membership[i]->inm_addr.s_addr ==
1888 mreq.imr_multiaddr.s_addr)
1889 break;
1890 }
1891 if (i == imo->imo_num_memberships) {
1892 INP_UNLOCK(inp);
1893 error = EADDRNOTAVAIL;
1894 splx(s);
1895 break;
1896 }
1897 /*
1898 * Give up the multicast address record to which the
1899 * membership points.
1900 */
1901 in_delmulti(imo->imo_membership[i]);
1902 /*
1903 * Remove the gap in the membership array.
1904 */
1905 for (++i; i < imo->imo_num_memberships; ++i)
1906 imo->imo_membership[i-1] = imo->imo_membership[i];
1907 --imo->imo_num_memberships;
1908 INP_UNLOCK(inp);
1909 splx(s);
1910 break;
1911
1912 default:
1913 error = EOPNOTSUPP;
1914 break;
1915 }
1916
1917 return (error);
1918 }
1919
1920 /*
1921 * Return the IP multicast options in response to user getsockopt().
1922 */
1923 static int
1924 ip_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1925 {
1926 struct ip_moptions *imo;
1927 struct in_addr addr;
1928 struct in_ifaddr *ia;
1929 int error, optval;
1930 u_char coptval;
1931
1932 INP_LOCK(inp);
1933 imo = inp->inp_moptions;
1934
1935 error = 0;
1936 switch (sopt->sopt_name) {
1937 case IP_MULTICAST_VIF:
1938 if (imo != NULL)
1939 optval = imo->imo_multicast_vif;
1940 else
1941 optval = -1;
1942 INP_UNLOCK(inp);
1943 error = sooptcopyout(sopt, &optval, sizeof optval);
1944 break;
1945
1946 case IP_MULTICAST_IF:
1947 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1948 addr.s_addr = INADDR_ANY;
1949 else if (imo->imo_multicast_addr.s_addr) {
1950 /* return the value user has set */
1951 addr = imo->imo_multicast_addr;
1952 } else {
1953 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1954 addr.s_addr = (ia == NULL) ? INADDR_ANY
1955 : IA_SIN(ia)->sin_addr.s_addr;
1956 }
1957 INP_UNLOCK(inp);
1958 error = sooptcopyout(sopt, &addr, sizeof addr);
1959 break;
1960
1961 case IP_MULTICAST_TTL:
1962 if (imo == 0)
1963 optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1964 else
1965 optval = coptval = imo->imo_multicast_ttl;
1966 INP_UNLOCK(inp);
1967 if (sopt->sopt_valsize == 1)
1968 error = sooptcopyout(sopt, &coptval, 1);
1969 else
1970 error = sooptcopyout(sopt, &optval, sizeof optval);
1971 break;
1972
1973 case IP_MULTICAST_LOOP:
1974 if (imo == 0)
1975 optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1976 else
1977 optval = coptval = imo->imo_multicast_loop;
1978 INP_UNLOCK(inp);
1979 if (sopt->sopt_valsize == 1)
1980 error = sooptcopyout(sopt, &coptval, 1);
1981 else
1982 error = sooptcopyout(sopt, &optval, sizeof optval);
1983 break;
1984
1985 default:
1986 INP_UNLOCK(inp);
1987 error = ENOPROTOOPT;
1988 break;
1989 }
1990 INP_UNLOCK_ASSERT(inp);
1991
1992 return (error);
1993 }
1994
1995 /*
1996 * Discard the IP multicast options.
1997 */
1998 void
1999 ip_freemoptions(imo)
2000 register struct ip_moptions *imo;
2001 {
2002 register int i;
2003
2004 if (imo != NULL) {
2005 for (i = 0; i < imo->imo_num_memberships; ++i)
2006 in_delmulti(imo->imo_membership[i]);
2007 free(imo, M_IPMOPTS);
2008 }
2009 }
2010
2011 /*
2012 * Routine called from ip_output() to loop back a copy of an IP multicast
2013 * packet to the input queue of a specified interface. Note that this
2014 * calls the output routine of the loopback "driver", but with an interface
2015 * pointer that might NOT be a loopback interface -- evil, but easier than
2016 * replicating that code here.
2017 */
2018 static void
2019 ip_mloopback(ifp, m, dst, hlen)
2020 struct ifnet *ifp;
2021 register struct mbuf *m;
2022 register struct sockaddr_in *dst;
2023 int hlen;
2024 {
2025 register struct ip *ip;
2026 struct mbuf *copym;
2027
2028 copym = m_copy(m, 0, M_COPYALL);
2029 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
2030 copym = m_pullup(copym, hlen);
2031 if (copym != NULL) {
2032 /* If needed, compute the checksum and mark it as valid. */
2033 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2034 in_delayed_cksum(copym);
2035 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
2036 copym->m_pkthdr.csum_flags |=
2037 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
2038 copym->m_pkthdr.csum_data = 0xffff;
2039 }
2040 /*
2041 * We don't bother to fragment if the IP length is greater
2042 * than the interface's MTU. Can this possibly matter?
2043 */
2044 ip = mtod(copym, struct ip *);
2045 ip->ip_len = htons(ip->ip_len);
2046 ip->ip_off = htons(ip->ip_off);
2047 ip->ip_sum = 0;
2048 ip->ip_sum = in_cksum(copym, hlen);
2049 /*
2050 * NB:
2051 * It's not clear whether there are any lingering
2052 * reentrancy problems in other areas which might
2053 * be exposed by using ip_input directly (in
2054 * particular, everything which modifies the packet
2055 * in-place). Yet another option is using the
2056 * protosw directly to deliver the looped back
2057 * packet. For the moment, we'll err on the side
2058 * of safety by using if_simloop().
2059 */
2060 #if 1 /* XXX */
2061 if (dst->sin_family != AF_INET) {
2062 printf("ip_mloopback: bad address family %d\n",
2063 dst->sin_family);
2064 dst->sin_family = AF_INET;
2065 }
2066 #endif
2067
2068 #ifdef notdef
2069 copym->m_pkthdr.rcvif = ifp;
2070 ip_input(copym);
2071 #else
2072 if_simloop(ifp, copym, dst->sin_family, 0);
2073 #endif
2074 }
2075 }
Cache object: af27eba39af1ad953a7be259d3b50043
|