1 /*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
34 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.44.2.17 2000/06/26 20:49:22 alfred Exp $
35 */
36
37 #define _IP_VHL
38
39 #include "opt_ipfw.h"
40
41 #include <sys/param.h>
42 #include <sys/queue.h>
43 #include <sys/systm.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/errno.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50
51 #include <net/if.h>
52 #include <net/route.h>
53
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_var.h>
60
61 #ifdef vax
62 #include <machine/mtpr.h>
63 #endif
64 #include <machine/in_cksum.h>
65
66 #if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
67 #undef COMPAT_IPFW
68 #define COMPAT_IPFW 1
69 #else
70 #undef COMPAT_IPFW
71 #endif
72
73 #ifdef IPFIREWALL
74 #include <netinet/ip_fw.h>
75 #endif
76
77 #ifdef DUMMYNET
78 #include <netinet/ip_dummynet.h>
79 #endif
80
81 u_short ip_id;
82
83 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
84 static void ip_mloopback
85 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
86 static int ip_getmoptions
87 __P((int, struct ip_moptions *, struct mbuf **));
88 static int ip_optcopy __P((struct ip *, struct ip *));
89 static int ip_pcbopts __P((struct mbuf **, struct mbuf *));
90 static int ip_setmoptions
91 __P((int, struct ip_moptions **, struct mbuf *));
92
93 extern struct protosw inetsw[];
94
95 /*
96 * IP output. The packet in mbuf chain m contains a skeletal IP
97 * header (with len, off, ttl, proto, tos, src, dst).
98 * The mbuf chain containing the packet will be freed.
99 * The mbuf opt, if present, will not be freed.
100 */
101 int
102 ip_output(m0, opt, ro, flags, imo)
103 struct mbuf *m0;
104 struct mbuf *opt;
105 struct route *ro;
106 int flags;
107 struct ip_moptions *imo;
108 {
109 struct ip *ip, *mhip;
110 struct ifnet *ifp;
111 struct mbuf *m = m0;
112 int hlen = sizeof (struct ip);
113 int len, off, error = 0;
114 struct sockaddr_in *dst;
115 struct in_ifaddr *ia;
116 int isbroadcast;
117
118 #ifndef IPDIVERT /* dummy variable for the firewall code to play with */
119 u_short ip_divert_cookie = 0 ;
120 #endif
121 #ifdef COMPAT_IPFW
122 struct ip_fw_chain *rule = NULL;
123 #endif
124
125 #if defined(IPFIREWALL) && defined (DUMMYNET)
126 /*
127 * dummynet packet are prepended a vestigial mbuf with
128 * m_type = MT_DUMMYNET and m_data pointing to the matching
129 * rule.
130 */
131 if (m->m_type == MT_DUMMYNET) {
132 /*
133 * the packet was already tagged, so part of the
134 * processing was already done, and we need to go down.
135 * opt, flags and imo have already been used, and now
136 * they are used to hold ifp, dst and NULL, respectively.
137 */
138 rule = (struct ip_fw_chain *)(m->m_data) ;
139 m = m->m_next ;
140 m0 = m ;
141 ip = mtod(m, struct ip *);
142 dst = (struct sockaddr_in *)flags ;
143 ifp = (struct ifnet *)opt;
144 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
145 opt = NULL ;
146 flags = 0 ; /* XXX is this correct ? */
147 goto sendit;
148 }
149 #endif
150 #ifdef DIAGNOSTIC
151 if ((m->m_flags & M_PKTHDR) == 0)
152 panic("ip_output no HDR");
153 if (!ro)
154 panic("ip_output no route, proto = %d",
155 mtod(m, struct ip *)->ip_p);
156 #endif
157 if (opt) {
158 m = ip_insertoptions(m, opt, &len);
159 hlen = len;
160 }
161 ip = mtod(m, struct ip *);
162 /*
163 * Fill in IP header.
164 */
165 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
166 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
167 ip->ip_off &= IP_DF;
168 ip->ip_id = htons(ip_id++);
169 ipstat.ips_localout++;
170 } else {
171 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
172 }
173
174 dst = (struct sockaddr_in *)&ro->ro_dst;
175 /*
176 * If there is a cached route,
177 * check that it is to the same destination
178 * and is still up. If not, free it and try again.
179 */
180 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
181 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
182 RTFREE(ro->ro_rt);
183 ro->ro_rt = (struct rtentry *)0;
184 }
185 if (ro->ro_rt == 0) {
186 dst->sin_family = AF_INET;
187 dst->sin_len = sizeof(*dst);
188 dst->sin_addr = ip->ip_dst;
189 }
190 /*
191 * If routing to interface only,
192 * short circuit routing lookup.
193 */
194 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
195 #define sintosa(sin) ((struct sockaddr *)(sin))
196 if (flags & IP_ROUTETOIF) {
197 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
198 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
199 ipstat.ips_noroute++;
200 error = ENETUNREACH;
201 goto bad;
202 }
203 ifp = ia->ia_ifp;
204 ip->ip_ttl = 1;
205 isbroadcast = in_broadcast(dst->sin_addr, ifp);
206 } else {
207 /*
208 * If this is the case, we probably don't want to allocate
209 * a protocol-cloned route since we didn't get one from the
210 * ULP. This lets TCP do its thing, while not burdening
211 * forwarding or ICMP with the overhead of cloning a route.
212 * Of course, we still want to do any cloning requested by
213 * the link layer, as this is probably required in all cases
214 * for correct operation (as it is for ARP).
215 */
216 if (ro->ro_rt == 0)
217 rtalloc_ign(ro, RTF_PRCLONING);
218 if (ro->ro_rt == 0) {
219 ipstat.ips_noroute++;
220 error = EHOSTUNREACH;
221 goto bad;
222 }
223 ia = ifatoia(ro->ro_rt->rt_ifa);
224 ifp = ro->ro_rt->rt_ifp;
225 ro->ro_rt->rt_use++;
226 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
227 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
228 if (ro->ro_rt->rt_flags & RTF_HOST)
229 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
230 else
231 isbroadcast = in_broadcast(dst->sin_addr, ifp);
232 }
233 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
234 struct in_multi *inm;
235
236 m->m_flags |= M_MCAST;
237 /*
238 * IP destination address is multicast. Make sure "dst"
239 * still points to the address in "ro". (It may have been
240 * changed to point to a gateway address, above.)
241 */
242 dst = (struct sockaddr_in *)&ro->ro_dst;
243 /*
244 * See if the caller provided any multicast options
245 */
246 if (imo != NULL) {
247 ip->ip_ttl = imo->imo_multicast_ttl;
248 if (imo->imo_multicast_ifp != NULL)
249 ifp = imo->imo_multicast_ifp;
250 if (imo->imo_multicast_vif != -1)
251 ip->ip_src.s_addr =
252 ip_mcast_src(imo->imo_multicast_vif);
253 } else
254 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
255 /*
256 * Confirm that the outgoing interface supports multicast.
257 */
258 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
259 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
260 ipstat.ips_noroute++;
261 error = ENETUNREACH;
262 goto bad;
263 }
264 }
265 /*
266 * If source address not specified yet, use address
267 * of outgoing interface.
268 */
269 if (ip->ip_src.s_addr == INADDR_ANY) {
270 register struct in_ifaddr *ia;
271
272 for (ia = in_ifaddr; ia; ia = ia->ia_next)
273 if (ia->ia_ifp == ifp) {
274 ip->ip_src = IA_SIN(ia)->sin_addr;
275 break;
276 }
277 }
278
279 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
280 if (inm != NULL &&
281 (imo == NULL || imo->imo_multicast_loop)) {
282 /*
283 * If we belong to the destination multicast group
284 * on the outgoing interface, and the caller did not
285 * forbid loopback, loop back a copy.
286 */
287 ip_mloopback(ifp, m, dst, hlen);
288 }
289 else {
290 /*
291 * If we are acting as a multicast router, perform
292 * multicast forwarding as if the packet had just
293 * arrived on the interface to which we are about
294 * to send. The multicast forwarding function
295 * recursively calls this function, using the
296 * IP_FORWARDING flag to prevent infinite recursion.
297 *
298 * Multicasts that are looped back by ip_mloopback(),
299 * above, will be forwarded by the ip_input() routine,
300 * if necessary.
301 */
302 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
303 /*
304 * Check if rsvp daemon is running. If not, don't
305 * set ip_moptions. This ensures that the packet
306 * is multicast and not just sent down one link
307 * as prescribed by rsvpd.
308 */
309 if (!rsvp_on)
310 imo = NULL;
311 if (ip_mforward(ip, ifp, m, imo) != 0) {
312 m_freem(m);
313 goto done;
314 }
315 }
316 }
317
318 /*
319 * Multicasts with a time-to-live of zero may be looped-
320 * back, above, but must not be transmitted on a network.
321 * Also, multicasts addressed to the loopback interface
322 * are not sent -- the above call to ip_mloopback() will
323 * loop back a copy if this host actually belongs to the
324 * destination group on the loopback interface.
325 */
326 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
327 m_freem(m);
328 goto done;
329 }
330
331 goto sendit;
332 }
333 #ifndef notdef
334 /*
335 * If source address not specified yet, use address
336 * of outgoing interface.
337 */
338 if (ip->ip_src.s_addr == INADDR_ANY)
339 ip->ip_src = IA_SIN(ia)->sin_addr;
340 #endif
341 /*
342 * Verify that we have any chance at all of being able to queue
343 * the packet or packet fragments
344 */
345 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
346 ifp->if_snd.ifq_maxlen) {
347 error = ENOBUFS;
348 goto bad;
349 }
350
351 /*
352 * Look for broadcast address and
353 * and verify user is allowed to send
354 * such a packet.
355 */
356 if (isbroadcast) {
357 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
358 error = EADDRNOTAVAIL;
359 goto bad;
360 }
361 if ((flags & IP_ALLOWBROADCAST) == 0) {
362 error = EACCES;
363 goto bad;
364 }
365 /* don't allow broadcast messages to be fragmented */
366 if ((u_short)ip->ip_len > ifp->if_mtu) {
367 error = EMSGSIZE;
368 goto bad;
369 }
370 m->m_flags |= M_BCAST;
371 } else {
372 m->m_flags &= ~M_BCAST;
373 }
374
375 sendit:
376 /*
377 * IpHack's section.
378 * - Xlate: translate packet's addr/port (NAT).
379 * - Firewall: deny/allow/etc.
380 * - Wrap: fake packet's addr/port <unimpl.>
381 * - Encapsulate: put it in another IP and send out. <unimp.>
382 */
383
384 #ifdef COMPAT_IPFW
385 if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) {
386 error = EACCES;
387 goto done;
388 }
389
390 /*
391 * Check with the firewall...
392 */
393 if (ip_fw_chk_ptr) {
394 off=(*ip_fw_chk_ptr)(&ip,hlen,ifp,&ip_divert_cookie,&m,&rule);
395 if (!m) { /* pkt discarded by firewall */
396 error = EACCES;
397 goto done;
398 }
399 if (off) { /* divert, dummynet, etc. */
400 #ifdef DUMMYNET
401 if (off & 0x10000) {
402 /*
403 * pass the pkt to dummynet. Need to include
404 * pipe number, m, ifp, ro, dst because these are
405 * not recomputed in the next pass.
406 * All other parameters have been already used and
407 * so they are not needed anymore.
408 * XXX note: if the ifp or ro entry are deleted
409 * while a pkt is in dummynet, we are in trouble!
410 */
411 dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,ifp,ro,dst,rule);
412 goto done ;
413 }
414 #endif
415 #ifdef IPDIVERT
416 ip_divert_port = off & 0xffff ;
417 if (ip_divert_port) { /* Divert packet */
418 (*inetsw[ip_protox[IPPROTO_DIVERT]].pr_input)(m, 0);
419 goto done;
420 }
421 #endif
422 /* if none of the above matches, we have to drop the pkt */
423 m_freem(m);
424 error = EACCES;
425 goto done;
426 }
427 }
428 #endif /* COMPAT_IPFW */
429
430 /*
431 * If small enough for interface, can just send directly.
432 */
433 if ((u_short)ip->ip_len <= ifp->if_mtu) {
434 ip->ip_len = htons((u_short)ip->ip_len);
435 ip->ip_off = htons((u_short)ip->ip_off);
436 ip->ip_sum = 0;
437 if (ip->ip_vhl == IP_VHL_BORING) {
438 ip->ip_sum = in_cksum_hdr(ip);
439 } else {
440 ip->ip_sum = in_cksum(m, hlen);
441 }
442 error = (*ifp->if_output)(ifp, m,
443 (struct sockaddr *)dst, ro->ro_rt);
444 goto done;
445 }
446 /*
447 * Too large for interface; fragment if possible.
448 * Must be able to put at least 8 bytes per fragment.
449 */
450 if (ip->ip_off & IP_DF) {
451 error = EMSGSIZE;
452 /*
453 * This case can happen if the user changed the MTU
454 * of an interface after enabling IP on it. Because
455 * most netifs don't keep track of routes pointing to
456 * them, there is no way for one to update all its
457 * routes when the MTU is changed.
458 */
459 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
460 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
461 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
462 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
463 }
464 ipstat.ips_cantfrag++;
465 goto bad;
466 }
467 len = (ifp->if_mtu - hlen) &~ 7;
468 if (len < 8) {
469 error = EMSGSIZE;
470 goto bad;
471 }
472
473 {
474 int mhlen, firstlen = len;
475 struct mbuf **mnext = &m->m_nextpkt;
476
477 /*
478 * Loop through length of segment after first fragment,
479 * make new header and copy data of each part and link onto chain.
480 */
481 m0 = m;
482 mhlen = sizeof (struct ip);
483 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
484 MGETHDR(m, M_DONTWAIT, MT_HEADER);
485 if (m == 0) {
486 error = ENOBUFS;
487 ipstat.ips_odropped++;
488 goto sendorfree;
489 }
490 m->m_flags |= (m0->m_flags & M_MCAST);
491 m->m_data += max_linkhdr;
492 mhip = mtod(m, struct ip *);
493 *mhip = *ip;
494 if (hlen > sizeof (struct ip)) {
495 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
496 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
497 }
498 m->m_len = mhlen;
499 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
500 if (ip->ip_off & IP_MF)
501 mhip->ip_off |= IP_MF;
502 if (off + len >= (u_short)ip->ip_len)
503 len = (u_short)ip->ip_len - off;
504 else
505 mhip->ip_off |= IP_MF;
506 mhip->ip_len = htons((u_short)(len + mhlen));
507 m->m_next = m_copy(m0, off, len);
508 if (m->m_next == 0) {
509 (void) m_free(m);
510 error = ENOBUFS; /* ??? */
511 ipstat.ips_odropped++;
512 goto sendorfree;
513 }
514 m->m_pkthdr.len = mhlen + len;
515 m->m_pkthdr.rcvif = (struct ifnet *)0;
516 mhip->ip_off = htons((u_short)mhip->ip_off);
517 mhip->ip_sum = 0;
518 if (mhip->ip_vhl == IP_VHL_BORING) {
519 mhip->ip_sum = in_cksum_hdr(mhip);
520 } else {
521 mhip->ip_sum = in_cksum(m, mhlen);
522 }
523 *mnext = m;
524 mnext = &m->m_nextpkt;
525 ipstat.ips_ofragments++;
526 }
527 /*
528 * Update first fragment by trimming what's been copied out
529 * and updating header, then send each fragment (in order).
530 */
531 m = m0;
532 m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
533 m->m_pkthdr.len = hlen + firstlen;
534 ip->ip_len = htons((u_short)m->m_pkthdr.len);
535 ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
536 ip->ip_sum = 0;
537 if (ip->ip_vhl == IP_VHL_BORING) {
538 ip->ip_sum = in_cksum_hdr(ip);
539 } else {
540 ip->ip_sum = in_cksum(m, hlen);
541 }
542 sendorfree:
543 for (m = m0; m; m = m0) {
544 m0 = m->m_nextpkt;
545 m->m_nextpkt = 0;
546 if (error == 0)
547 error = (*ifp->if_output)(ifp, m,
548 (struct sockaddr *)dst, ro->ro_rt);
549 else
550 m_freem(m);
551 }
552
553 if (error == 0)
554 ipstat.ips_fragmented++;
555 }
556 done:
557 return (error);
558 bad:
559 m_freem(m0);
560 goto done;
561 }
562
563 /*
564 * Insert IP options into preformed packet.
565 * Adjust IP destination as required for IP source routing,
566 * as indicated by a non-zero in_addr at the start of the options.
567 *
568 * XXX This routine assumes that the packet has no options in place.
569 */
570 static struct mbuf *
571 ip_insertoptions(m, opt, phlen)
572 register struct mbuf *m;
573 struct mbuf *opt;
574 int *phlen;
575 {
576 register struct ipoption *p = mtod(opt, struct ipoption *);
577 struct mbuf *n;
578 register struct ip *ip = mtod(m, struct ip *);
579 unsigned optlen;
580
581 optlen = opt->m_len - sizeof(p->ipopt_dst);
582 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
583 return (m); /* XXX should fail */
584 if (p->ipopt_dst.s_addr)
585 ip->ip_dst = p->ipopt_dst;
586 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
587 MGETHDR(n, M_DONTWAIT, MT_HEADER);
588 if (n == 0)
589 return (m);
590 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
591 m->m_len -= sizeof(struct ip);
592 m->m_data += sizeof(struct ip);
593 n->m_next = m;
594 m = n;
595 m->m_len = optlen + sizeof(struct ip);
596 m->m_data += max_linkhdr;
597 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
598 } else {
599 m->m_data -= optlen;
600 m->m_len += optlen;
601 m->m_pkthdr.len += optlen;
602 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
603 }
604 ip = mtod(m, struct ip *);
605 bcopy(p->ipopt_list, ip + 1, optlen);
606 *phlen = sizeof(struct ip) + optlen;
607 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
608 ip->ip_len += optlen;
609 return (m);
610 }
611
612 /*
613 * Copy options from ip to jp,
614 * omitting those not copied during fragmentation.
615 */
616 static int
617 ip_optcopy(ip, jp)
618 struct ip *ip, *jp;
619 {
620 register u_char *cp, *dp;
621 int opt, optlen, cnt;
622
623 cp = (u_char *)(ip + 1);
624 dp = (u_char *)(jp + 1);
625 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
626 for (; cnt > 0; cnt -= optlen, cp += optlen) {
627 opt = cp[0];
628 if (opt == IPOPT_EOL)
629 break;
630 if (opt == IPOPT_NOP) {
631 /* Preserve for IP mcast tunnel's LSRR alignment. */
632 *dp++ = IPOPT_NOP;
633 optlen = 1;
634 continue;
635 } else
636 optlen = cp[IPOPT_OLEN];
637 /* bogus lengths should have been caught by ip_dooptions */
638 if (optlen > cnt)
639 optlen = cnt;
640 if (IPOPT_COPIED(opt)) {
641 bcopy(cp, dp, optlen);
642 dp += optlen;
643 }
644 }
645 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
646 *dp++ = IPOPT_EOL;
647 return (optlen);
648 }
649
650 /*
651 * IP socket option processing.
652 */
653 int
654 ip_ctloutput(op, so, level, optname, mp)
655 int op;
656 struct socket *so;
657 int level, optname;
658 struct mbuf **mp;
659 {
660 register struct inpcb *inp = sotoinpcb(so);
661 register struct mbuf *m = *mp;
662 register int optval = 0;
663 int error = 0;
664
665 if (level != IPPROTO_IP) {
666 error = EINVAL;
667 if (op == PRCO_SETOPT && *mp)
668 (void) m_free(*mp);
669 } else switch (op) {
670
671 case PRCO_SETOPT:
672 switch (optname) {
673 case IP_OPTIONS:
674 #ifdef notyet
675 case IP_RETOPTS:
676 return (ip_pcbopts(optname, &inp->inp_options, m));
677 #else
678 return (ip_pcbopts(&inp->inp_options, m));
679 #endif
680
681 case IP_TOS:
682 case IP_TTL:
683 case IP_RECVOPTS:
684 case IP_RECVRETOPTS:
685 case IP_RECVDSTADDR:
686 case IP_RECVIF:
687 if (m == 0 || m->m_len != sizeof(int))
688 error = EINVAL;
689 else {
690 optval = *mtod(m, int *);
691 switch (optname) {
692
693 case IP_TOS:
694 inp->inp_ip_tos = optval;
695 break;
696
697 case IP_TTL:
698 inp->inp_ip_ttl = optval;
699 break;
700 #define OPTSET(bit) \
701 if (optval) \
702 inp->inp_flags |= bit; \
703 else \
704 inp->inp_flags &= ~bit;
705
706 case IP_RECVOPTS:
707 OPTSET(INP_RECVOPTS);
708 break;
709
710 case IP_RECVRETOPTS:
711 OPTSET(INP_RECVRETOPTS);
712 break;
713
714 case IP_RECVDSTADDR:
715 OPTSET(INP_RECVDSTADDR);
716 break;
717
718 case IP_RECVIF:
719 OPTSET(INP_RECVIF);
720 break;
721 }
722 }
723 break;
724 #undef OPTSET
725
726 case IP_MULTICAST_IF:
727 case IP_MULTICAST_VIF:
728 case IP_MULTICAST_TTL:
729 case IP_MULTICAST_LOOP:
730 case IP_ADD_MEMBERSHIP:
731 case IP_DROP_MEMBERSHIP:
732 error = ip_setmoptions(optname, &inp->inp_moptions, m);
733 break;
734
735 case IP_PORTRANGE:
736 if (m == 0 || m->m_len != sizeof(int))
737 error = EINVAL;
738 else {
739 optval = *mtod(m, int *);
740
741 switch (optval) {
742
743 case IP_PORTRANGE_DEFAULT:
744 inp->inp_flags &= ~(INP_LOWPORT);
745 inp->inp_flags &= ~(INP_HIGHPORT);
746 break;
747
748 case IP_PORTRANGE_HIGH:
749 inp->inp_flags &= ~(INP_LOWPORT);
750 inp->inp_flags |= INP_HIGHPORT;
751 break;
752
753 case IP_PORTRANGE_LOW:
754 inp->inp_flags &= ~(INP_HIGHPORT);
755 inp->inp_flags |= INP_LOWPORT;
756 break;
757
758 default:
759 error = EINVAL;
760 break;
761 }
762 }
763 break;
764
765 default:
766 error = ENOPROTOOPT;
767 break;
768 }
769 if (m)
770 (void)m_free(m);
771 break;
772
773 case PRCO_GETOPT:
774 switch (optname) {
775 case IP_OPTIONS:
776 case IP_RETOPTS:
777 *mp = m = m_get(M_WAIT, MT_SOOPTS);
778 if (inp->inp_options) {
779 m->m_len = inp->inp_options->m_len;
780 bcopy(mtod(inp->inp_options, void *),
781 mtod(m, void *), m->m_len);
782 } else
783 m->m_len = 0;
784 break;
785
786 case IP_TOS:
787 case IP_TTL:
788 case IP_RECVOPTS:
789 case IP_RECVRETOPTS:
790 case IP_RECVDSTADDR:
791 case IP_RECVIF:
792 *mp = m = m_get(M_WAIT, MT_SOOPTS);
793 m->m_len = sizeof(int);
794 switch (optname) {
795
796 case IP_TOS:
797 optval = inp->inp_ip_tos;
798 break;
799
800 case IP_TTL:
801 optval = inp->inp_ip_ttl;
802 break;
803
804 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
805
806 case IP_RECVOPTS:
807 optval = OPTBIT(INP_RECVOPTS);
808 break;
809
810 case IP_RECVRETOPTS:
811 optval = OPTBIT(INP_RECVRETOPTS);
812 break;
813
814 case IP_RECVDSTADDR:
815 optval = OPTBIT(INP_RECVDSTADDR);
816 break;
817
818 case IP_RECVIF:
819 optval = OPTBIT(INP_RECVIF);
820 break;
821 }
822 *mtod(m, int *) = optval;
823 break;
824
825 case IP_MULTICAST_IF:
826 case IP_MULTICAST_VIF:
827 case IP_MULTICAST_TTL:
828 case IP_MULTICAST_LOOP:
829 case IP_ADD_MEMBERSHIP:
830 case IP_DROP_MEMBERSHIP:
831 error = ip_getmoptions(optname, inp->inp_moptions, mp);
832 break;
833
834 case IP_PORTRANGE:
835 *mp = m = m_get(M_WAIT, MT_SOOPTS);
836 m->m_len = sizeof(int);
837
838 if (inp->inp_flags & INP_HIGHPORT)
839 optval = IP_PORTRANGE_HIGH;
840 else if (inp->inp_flags & INP_LOWPORT)
841 optval = IP_PORTRANGE_LOW;
842 else
843 optval = 0;
844
845 *mtod(m, int *) = optval;
846 break;
847
848 default:
849 error = ENOPROTOOPT;
850 break;
851 }
852 break;
853 }
854 return (error);
855 }
856
857 /*
858 * Set up IP options in pcb for insertion in output packets.
859 * Store in mbuf with pointer in pcbopt, adding pseudo-option
860 * with destination address if source routed.
861 */
862 static int
863 #ifdef notyet
864 ip_pcbopts(optname, pcbopt, m)
865 int optname;
866 #else
867 ip_pcbopts(pcbopt, m)
868 #endif
869 struct mbuf **pcbopt;
870 register struct mbuf *m;
871 {
872 register cnt, optlen;
873 register u_char *cp;
874 u_char opt;
875
876 /* turn off any old options */
877 if (*pcbopt)
878 (void)m_free(*pcbopt);
879 *pcbopt = 0;
880 if (m == (struct mbuf *)0 || m->m_len == 0) {
881 /*
882 * Only turning off any previous options.
883 */
884 if (m)
885 (void)m_free(m);
886 return (0);
887 }
888
889 #ifndef vax
890 if (m->m_len % sizeof(long))
891 goto bad;
892 #endif
893 /*
894 * IP first-hop destination address will be stored before
895 * actual options; move other options back
896 * and clear it when none present.
897 */
898 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
899 goto bad;
900 cnt = m->m_len;
901 m->m_len += sizeof(struct in_addr);
902 cp = mtod(m, u_char *) + sizeof(struct in_addr);
903 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
904 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
905
906 for (; cnt > 0; cnt -= optlen, cp += optlen) {
907 opt = cp[IPOPT_OPTVAL];
908 if (opt == IPOPT_EOL)
909 break;
910 if (opt == IPOPT_NOP)
911 optlen = 1;
912 else {
913 if (cnt < IPOPT_OLEN + sizeof(*cp))
914 goto bad;
915 optlen = cp[IPOPT_OLEN];
916 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
917 goto bad;
918 }
919 switch (opt) {
920
921 default:
922 break;
923
924 case IPOPT_LSRR:
925 case IPOPT_SSRR:
926 /*
927 * user process specifies route as:
928 * ->A->B->C->D
929 * D must be our final destination (but we can't
930 * check that since we may not have connected yet).
931 * A is first hop destination, which doesn't appear in
932 * actual IP option, but is stored before the options.
933 */
934 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
935 goto bad;
936 m->m_len -= sizeof(struct in_addr);
937 cnt -= sizeof(struct in_addr);
938 optlen -= sizeof(struct in_addr);
939 cp[IPOPT_OLEN] = optlen;
940 /*
941 * Move first hop before start of options.
942 */
943 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
944 sizeof(struct in_addr));
945 /*
946 * Then copy rest of options back
947 * to close up the deleted entry.
948 */
949 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
950 sizeof(struct in_addr)),
951 (caddr_t)&cp[IPOPT_OFFSET+1],
952 (unsigned)cnt + sizeof(struct in_addr));
953 break;
954 }
955 }
956 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
957 goto bad;
958 *pcbopt = m;
959 return (0);
960
961 bad:
962 (void)m_free(m);
963 return (EINVAL);
964 }
965
966 /*
967 * Set the IP multicast options in response to user setsockopt().
968 */
969 static int
970 ip_setmoptions(optname, imop, m)
971 int optname;
972 struct ip_moptions **imop;
973 struct mbuf *m;
974 {
975 register int error = 0;
976 u_char loop;
977 register int i;
978 struct in_addr addr;
979 register struct ip_mreq *mreq;
980 register struct ifnet *ifp;
981 register struct ip_moptions *imo = *imop;
982 struct route ro;
983 register struct sockaddr_in *dst;
984 int s;
985
986 if (imo == NULL) {
987 /*
988 * No multicast option buffer attached to the pcb;
989 * allocate one and initialize to default values.
990 */
991 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
992 M_WAITOK);
993
994 if (imo == NULL)
995 return (ENOBUFS);
996 *imop = imo;
997 imo->imo_multicast_ifp = NULL;
998 imo->imo_multicast_vif = -1;
999 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1000 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1001 imo->imo_num_memberships = 0;
1002 }
1003
1004 switch (optname) {
1005 /* store an index number for the vif you wanna use in the send */
1006 case IP_MULTICAST_VIF:
1007 if (!legal_vif_num) {
1008 error = EOPNOTSUPP;
1009 break;
1010 }
1011 if (m == NULL || m->m_len != sizeof(int)) {
1012 error = EINVAL;
1013 break;
1014 }
1015 i = *(mtod(m, int *));
1016 if (!legal_vif_num(i) && (i != -1)) {
1017 error = EINVAL;
1018 break;
1019 }
1020 imo->imo_multicast_vif = i;
1021 break;
1022
1023 case IP_MULTICAST_IF:
1024 /*
1025 * Select the interface for outgoing multicast packets.
1026 */
1027 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1028 error = EINVAL;
1029 break;
1030 }
1031 addr = *(mtod(m, struct in_addr *));
1032 /*
1033 * INADDR_ANY is used to remove a previous selection.
1034 * When no interface is selected, a default one is
1035 * chosen every time a multicast packet is sent.
1036 */
1037 if (addr.s_addr == INADDR_ANY) {
1038 imo->imo_multicast_ifp = NULL;
1039 break;
1040 }
1041 /*
1042 * The selected interface is identified by its local
1043 * IP address. Find the interface and confirm that
1044 * it supports multicasting.
1045 */
1046 s = splimp();
1047 INADDR_TO_IFP(addr, ifp);
1048 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1049 splx(s);
1050 error = EADDRNOTAVAIL;
1051 break;
1052 }
1053 imo->imo_multicast_ifp = ifp;
1054 splx(s);
1055 break;
1056
1057 case IP_MULTICAST_TTL:
1058 /*
1059 * Set the IP time-to-live for outgoing multicast packets.
1060 */
1061 if (m == NULL || m->m_len != 1) {
1062 error = EINVAL;
1063 break;
1064 }
1065 imo->imo_multicast_ttl = *(mtod(m, u_char *));
1066 break;
1067
1068 case IP_MULTICAST_LOOP:
1069 /*
1070 * Set the loopback flag for outgoing multicast packets.
1071 * Must be zero or one.
1072 */
1073 if (m == NULL || m->m_len != 1 ||
1074 (loop = *(mtod(m, u_char *))) > 1) {
1075 error = EINVAL;
1076 break;
1077 }
1078 imo->imo_multicast_loop = loop;
1079 break;
1080
1081 case IP_ADD_MEMBERSHIP:
1082 /*
1083 * Add a multicast group membership.
1084 * Group must be a valid IP multicast address.
1085 */
1086 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1087 error = EINVAL;
1088 break;
1089 }
1090 mreq = mtod(m, struct ip_mreq *);
1091 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1092 error = EINVAL;
1093 break;
1094 }
1095 s = splimp();
1096 /*
1097 * If no interface address was provided, use the interface of
1098 * the route to the given multicast address.
1099 */
1100 if (mreq->imr_interface.s_addr == INADDR_ANY) {
1101 bzero((caddr_t)&ro, sizeof(ro));
1102 dst = (struct sockaddr_in *)&ro.ro_dst;
1103 dst->sin_len = sizeof(*dst);
1104 dst->sin_family = AF_INET;
1105 dst->sin_addr = mreq->imr_multiaddr;
1106 rtalloc(&ro);
1107 if (ro.ro_rt == NULL) {
1108 error = EADDRNOTAVAIL;
1109 splx(s);
1110 break;
1111 }
1112 ifp = ro.ro_rt->rt_ifp;
1113 rtfree(ro.ro_rt);
1114 }
1115 else {
1116 INADDR_TO_IFP(mreq->imr_interface, ifp);
1117 }
1118
1119 /*
1120 * See if we found an interface, and confirm that it
1121 * supports multicast.
1122 */
1123 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1124 error = EADDRNOTAVAIL;
1125 splx(s);
1126 break;
1127 }
1128 /*
1129 * See if the membership already exists or if all the
1130 * membership slots are full.
1131 */
1132 for (i = 0; i < imo->imo_num_memberships; ++i) {
1133 if (imo->imo_membership[i]->inm_ifp == ifp &&
1134 imo->imo_membership[i]->inm_addr.s_addr
1135 == mreq->imr_multiaddr.s_addr)
1136 break;
1137 }
1138 if (i < imo->imo_num_memberships) {
1139 error = EADDRINUSE;
1140 splx(s);
1141 break;
1142 }
1143 if (i == IP_MAX_MEMBERSHIPS) {
1144 error = ETOOMANYREFS;
1145 splx(s);
1146 break;
1147 }
1148 /*
1149 * Everything looks good; add a new record to the multicast
1150 * address list for the given interface.
1151 */
1152 if ((imo->imo_membership[i] =
1153 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1154 error = ENOBUFS;
1155 splx(s);
1156 break;
1157 }
1158 ++imo->imo_num_memberships;
1159 splx(s);
1160 break;
1161
1162 case IP_DROP_MEMBERSHIP:
1163 /*
1164 * Drop a multicast group membership.
1165 * Group must be a valid IP multicast address.
1166 */
1167 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1168 error = EINVAL;
1169 break;
1170 }
1171 mreq = mtod(m, struct ip_mreq *);
1172 if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1173 error = EINVAL;
1174 break;
1175 }
1176
1177 s = splimp();
1178 /*
1179 * If an interface address was specified, get a pointer
1180 * to its ifnet structure.
1181 */
1182 if (mreq->imr_interface.s_addr == INADDR_ANY)
1183 ifp = NULL;
1184 else {
1185 INADDR_TO_IFP(mreq->imr_interface, ifp);
1186 if (ifp == NULL) {
1187 error = EADDRNOTAVAIL;
1188 splx(s);
1189 break;
1190 }
1191 }
1192 /*
1193 * Find the membership in the membership array.
1194 */
1195 for (i = 0; i < imo->imo_num_memberships; ++i) {
1196 if ((ifp == NULL ||
1197 imo->imo_membership[i]->inm_ifp == ifp) &&
1198 imo->imo_membership[i]->inm_addr.s_addr ==
1199 mreq->imr_multiaddr.s_addr)
1200 break;
1201 }
1202 if (i == imo->imo_num_memberships) {
1203 error = EADDRNOTAVAIL;
1204 splx(s);
1205 break;
1206 }
1207 /*
1208 * Give up the multicast address record to which the
1209 * membership points.
1210 */
1211 in_delmulti(imo->imo_membership[i]);
1212 /*
1213 * Remove the gap in the membership array.
1214 */
1215 for (++i; i < imo->imo_num_memberships; ++i)
1216 imo->imo_membership[i-1] = imo->imo_membership[i];
1217 --imo->imo_num_memberships;
1218 splx(s);
1219 break;
1220
1221 default:
1222 error = EOPNOTSUPP;
1223 break;
1224 }
1225
1226 /*
1227 * If all options have default values, no need to keep the mbuf.
1228 */
1229 if (imo->imo_multicast_ifp == NULL &&
1230 imo->imo_multicast_vif == -1 &&
1231 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1232 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1233 imo->imo_num_memberships == 0) {
1234 free(*imop, M_IPMOPTS);
1235 *imop = NULL;
1236 }
1237
1238 return (error);
1239 }
1240
1241 /*
1242 * Return the IP multicast options in response to user getsockopt().
1243 */
1244 static int
1245 ip_getmoptions(optname, imo, mp)
1246 int optname;
1247 register struct ip_moptions *imo;
1248 register struct mbuf **mp;
1249 {
1250 u_char *ttl;
1251 u_char *loop;
1252 struct in_addr *addr;
1253 struct in_ifaddr *ia;
1254
1255 *mp = m_get(M_WAIT, MT_SOOPTS);
1256
1257 switch (optname) {
1258
1259 case IP_MULTICAST_VIF:
1260 if (imo != NULL)
1261 *(mtod(*mp, int *)) = imo->imo_multicast_vif;
1262 else
1263 *(mtod(*mp, int *)) = -1;
1264 (*mp)->m_len = sizeof(int);
1265 return(0);
1266
1267 case IP_MULTICAST_IF:
1268 addr = mtod(*mp, struct in_addr *);
1269 (*mp)->m_len = sizeof(struct in_addr);
1270 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1271 addr->s_addr = INADDR_ANY;
1272 else {
1273 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1274 addr->s_addr = (ia == NULL) ? INADDR_ANY
1275 : IA_SIN(ia)->sin_addr.s_addr;
1276 }
1277 return (0);
1278
1279 case IP_MULTICAST_TTL:
1280 ttl = mtod(*mp, u_char *);
1281 (*mp)->m_len = 1;
1282 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1283 : imo->imo_multicast_ttl;
1284 return (0);
1285
1286 case IP_MULTICAST_LOOP:
1287 loop = mtod(*mp, u_char *);
1288 (*mp)->m_len = 1;
1289 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1290 : imo->imo_multicast_loop;
1291 return (0);
1292
1293 default:
1294 return (EOPNOTSUPP);
1295 }
1296 }
1297
1298 /*
1299 * Discard the IP multicast options.
1300 */
1301 void
1302 ip_freemoptions(imo)
1303 register struct ip_moptions *imo;
1304 {
1305 register int i;
1306
1307 if (imo != NULL) {
1308 for (i = 0; i < imo->imo_num_memberships; ++i)
1309 in_delmulti(imo->imo_membership[i]);
1310 free(imo, M_IPMOPTS);
1311 }
1312 }
1313
1314 /*
1315 * Routine called from ip_output() to loop back a copy of an IP multicast
1316 * packet to the input queue of a specified interface. Note that this
1317 * calls the output routine of the loopback "driver", but with an interface
1318 * pointer that might NOT be a loopback interface -- evil, but easier than
1319 * replicating that code here.
1320 */
1321 static void
1322 ip_mloopback(ifp, m, dst, hlen)
1323 struct ifnet *ifp;
1324 register struct mbuf *m;
1325 register struct sockaddr_in *dst;
1326 int hlen;
1327 {
1328 register struct ip *ip;
1329 struct mbuf *copym;
1330
1331 copym = m_copy(m, 0, M_COPYALL);
1332 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1333 copym = m_pullup(copym, hlen);
1334 if (copym != NULL) {
1335 /*
1336 * We don't bother to fragment if the IP length is greater
1337 * than the interface's MTU. Can this possibly matter?
1338 */
1339 ip = mtod(copym, struct ip *);
1340 ip->ip_len = htons((u_short)ip->ip_len);
1341 ip->ip_off = htons((u_short)ip->ip_off);
1342 ip->ip_sum = 0;
1343 if (ip->ip_vhl == IP_VHL_BORING) {
1344 ip->ip_sum = in_cksum_hdr(ip);
1345 } else {
1346 ip->ip_sum = in_cksum(copym, hlen);
1347 }
1348 /*
1349 * NB:
1350 * It's not clear whether there are any lingering
1351 * reentrancy problems in other areas which might
1352 * be exposed by using ip_input directly (in
1353 * particular, everything which modifies the packet
1354 * in-place). Yet another option is using the
1355 * protosw directly to deliver the looped back
1356 * packet. For the moment, we'll err on the side
1357 * of safety by continuing to abuse looutput().
1358 */
1359 #ifdef notdef
1360 copym->m_pkthdr.rcvif = ifp;
1361 ip_input(copym)
1362 #else
1363 (void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
1364 #endif
1365 }
1366 }
Cache object: 7f97202ff18bdde5b80b54d62912ff18
|