1 /*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
30 */
31
32 /*-
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61 */
62
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD: src/sys/netinet6/ip6_output.c,v 1.122 2008/10/15 19:24:18 bz Exp $");
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68 #include "opt_ipsec.h"
69
70 #include <sys/param.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/errno.h>
75 #include <sys/priv.h>
76 #include <sys/proc.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/ucred.h>
81 #include <sys/vimage.h>
82
83 #include <net/if.h>
84 #include <net/netisr.h>
85 #include <net/route.h>
86 #include <net/pfil.h>
87
88 #include <netinet/in.h>
89 #include <netinet/in_var.h>
90 #include <netinet6/in6_var.h>
91 #include <netinet/ip6.h>
92 #include <netinet/icmp6.h>
93 #include <netinet6/ip6_var.h>
94 #include <netinet/in_pcb.h>
95 #include <netinet/tcp_var.h>
96 #include <netinet6/nd6.h>
97
98 #ifdef IPSEC
99 #include <netipsec/ipsec.h>
100 #include <netipsec/ipsec6.h>
101 #include <netipsec/key.h>
102 #include <netinet6/ip6_ipsec.h>
103 #endif /* IPSEC */
104
105 #include <netinet6/ip6protosw.h>
106 #include <netinet6/scope6_var.h>
107
108 static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "internet multicast options");
109
110 struct ip6_exthdrs {
111 struct mbuf *ip6e_ip6;
112 struct mbuf *ip6e_hbh;
113 struct mbuf *ip6e_dest1;
114 struct mbuf *ip6e_rthdr;
115 struct mbuf *ip6e_dest2;
116 };
117
118 static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
119 struct ucred *, int));
120 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
121 struct socket *, struct sockopt *));
122 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
123 static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *,
124 struct ucred *, int, int, int));
125
126 static int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *);
127 static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **);
128 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
129 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
130 struct ip6_frag **));
131 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
132 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
133 static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
134 struct ifnet *, struct in6_addr *, u_long *, int *));
135 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
136
137
138 /*
139 * Make an extension header from option data. hp is the source, and
140 * mp is the destination.
141 */
142 #define MAKE_EXTHDR(hp, mp) \
143 do { \
144 if (hp) { \
145 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
146 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
147 ((eh)->ip6e_len + 1) << 3); \
148 if (error) \
149 goto freehdrs; \
150 } \
151 } while (/*CONSTCOND*/ 0)
152
153 /*
154 * Form a chain of extension headers.
155 * m is the extension header mbuf
156 * mp is the previous mbuf in the chain
157 * p is the next header
158 * i is the type of option.
159 */
160 #define MAKE_CHAIN(m, mp, p, i)\
161 do {\
162 if (m) {\
163 if (!hdrsplit) \
164 panic("assumption failed: hdr not split"); \
165 *mtod((m), u_char *) = *(p);\
166 *(p) = (i);\
167 p = mtod((m), u_char *);\
168 (m)->m_next = (mp)->m_next;\
169 (mp)->m_next = (m);\
170 (mp) = (m);\
171 }\
172 } while (/*CONSTCOND*/ 0)
173
174 /*
175 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
176 * header (with pri, len, nxt, hlim, src, dst).
177 * This function may modify ver and hlim only.
178 * The mbuf chain containing the packet will be freed.
179 * The mbuf opt, if present, will not be freed.
180 *
181 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
182 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
183 * which is rt_rmx.rmx_mtu.
184 *
185 * ifpp - XXX: just for statistics
186 */
187 int
188 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
189 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
190 struct ifnet **ifpp, struct inpcb *inp)
191 {
192 INIT_VNET_NET(curvnet);
193 INIT_VNET_INET6(curvnet);
194 struct ip6_hdr *ip6, *mhip6;
195 struct ifnet *ifp, *origifp;
196 struct mbuf *m = m0;
197 struct mbuf *mprev = NULL;
198 int hlen, tlen, len, off;
199 struct route_in6 ip6route;
200 struct rtentry *rt = NULL;
201 struct sockaddr_in6 *dst, src_sa, dst_sa;
202 struct in6_addr odst;
203 int error = 0;
204 struct in6_ifaddr *ia = NULL;
205 u_long mtu;
206 int alwaysfrag, dontfrag;
207 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
208 struct ip6_exthdrs exthdrs;
209 struct in6_addr finaldst, src0, dst0;
210 u_int32_t zone;
211 struct route_in6 *ro_pmtu = NULL;
212 int hdrsplit = 0;
213 int needipsec = 0;
214 #ifdef IPSEC
215 struct ipsec_output_state state;
216 struct ip6_rthdr *rh = NULL;
217 int needipsectun = 0;
218 int segleft_org = 0;
219 struct secpolicy *sp = NULL;
220 #endif /* IPSEC */
221
222 ip6 = mtod(m, struct ip6_hdr *);
223 if (ip6 == NULL) {
224 printf ("ip6 is NULL");
225 goto bad;
226 }
227
228 finaldst = ip6->ip6_dst;
229
230 bzero(&exthdrs, sizeof(exthdrs));
231
232 if (opt) {
233 /* Hop-by-Hop options header */
234 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
235 /* Destination options header(1st part) */
236 if (opt->ip6po_rthdr) {
237 /*
238 * Destination options header(1st part)
239 * This only makes sense with a routing header.
240 * See Section 9.2 of RFC 3542.
241 * Disabling this part just for MIP6 convenience is
242 * a bad idea. We need to think carefully about a
243 * way to make the advanced API coexist with MIP6
244 * options, which might automatically be inserted in
245 * the kernel.
246 */
247 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
248 }
249 /* Routing header */
250 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
251 /* Destination options header(2nd part) */
252 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
253 }
254
255 /*
256 * IPSec checking which handles several cases.
257 * FAST IPSEC: We re-injected the packet.
258 */
259 #ifdef IPSEC
260 switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
261 {
262 case 1: /* Bad packet */
263 goto freehdrs;
264 case -1: /* Do IPSec */
265 needipsec = 1;
266 case 0: /* No IPSec */
267 default:
268 break;
269 }
270 #endif /* IPSEC */
271
272 /*
273 * Calculate the total length of the extension header chain.
274 * Keep the length of the unfragmentable part for fragmentation.
275 */
276 optlen = 0;
277 if (exthdrs.ip6e_hbh)
278 optlen += exthdrs.ip6e_hbh->m_len;
279 if (exthdrs.ip6e_dest1)
280 optlen += exthdrs.ip6e_dest1->m_len;
281 if (exthdrs.ip6e_rthdr)
282 optlen += exthdrs.ip6e_rthdr->m_len;
283 unfragpartlen = optlen + sizeof(struct ip6_hdr);
284
285 /* NOTE: we don't add AH/ESP length here. do that later. */
286 if (exthdrs.ip6e_dest2)
287 optlen += exthdrs.ip6e_dest2->m_len;
288
289 /*
290 * If we need IPsec, or there is at least one extension header,
291 * separate IP6 header from the payload.
292 */
293 if ((needipsec || optlen) && !hdrsplit) {
294 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
295 m = NULL;
296 goto freehdrs;
297 }
298 m = exthdrs.ip6e_ip6;
299 hdrsplit++;
300 }
301
302 /* adjust pointer */
303 ip6 = mtod(m, struct ip6_hdr *);
304
305 /* adjust mbuf packet header length */
306 m->m_pkthdr.len += optlen;
307 plen = m->m_pkthdr.len - sizeof(*ip6);
308
309 /* If this is a jumbo payload, insert a jumbo payload option. */
310 if (plen > IPV6_MAXPACKET) {
311 if (!hdrsplit) {
312 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
313 m = NULL;
314 goto freehdrs;
315 }
316 m = exthdrs.ip6e_ip6;
317 hdrsplit++;
318 }
319 /* adjust pointer */
320 ip6 = mtod(m, struct ip6_hdr *);
321 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
322 goto freehdrs;
323 ip6->ip6_plen = 0;
324 } else
325 ip6->ip6_plen = htons(plen);
326
327 /*
328 * Concatenate headers and fill in next header fields.
329 * Here we have, on "m"
330 * IPv6 payload
331 * and we insert headers accordingly. Finally, we should be getting:
332 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
333 *
334 * during the header composing process, "m" points to IPv6 header.
335 * "mprev" points to an extension header prior to esp.
336 */
337 u_char *nexthdrp = &ip6->ip6_nxt;
338 mprev = m;
339
340 /*
341 * we treat dest2 specially. this makes IPsec processing
342 * much easier. the goal here is to make mprev point the
343 * mbuf prior to dest2.
344 *
345 * result: IPv6 dest2 payload
346 * m and mprev will point to IPv6 header.
347 */
348 if (exthdrs.ip6e_dest2) {
349 if (!hdrsplit)
350 panic("assumption failed: hdr not split");
351 exthdrs.ip6e_dest2->m_next = m->m_next;
352 m->m_next = exthdrs.ip6e_dest2;
353 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
354 ip6->ip6_nxt = IPPROTO_DSTOPTS;
355 }
356
357 /*
358 * result: IPv6 hbh dest1 rthdr dest2 payload
359 * m will point to IPv6 header. mprev will point to the
360 * extension header prior to dest2 (rthdr in the above case).
361 */
362 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
363 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
364 IPPROTO_DSTOPTS);
365 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
366 IPPROTO_ROUTING);
367
368 #ifdef IPSEC
369 if (!needipsec)
370 goto skip_ipsec2;
371
372 /*
373 * pointers after IPsec headers are not valid any more.
374 * other pointers need a great care too.
375 * (IPsec routines should not mangle mbufs prior to AH/ESP)
376 */
377 exthdrs.ip6e_dest2 = NULL;
378
379 if (exthdrs.ip6e_rthdr) {
380 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
381 segleft_org = rh->ip6r_segleft;
382 rh->ip6r_segleft = 0;
383 }
384
385 bzero(&state, sizeof(state));
386 state.m = m;
387 error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
388 &needipsectun);
389 m = state.m;
390 if (error == EJUSTRETURN) {
391 /*
392 * We had a SP with a level of 'use' and no SA. We
393 * will just continue to process the packet without
394 * IPsec processing.
395 */
396 ;
397 } else if (error) {
398 /* mbuf is already reclaimed in ipsec6_output_trans. */
399 m = NULL;
400 switch (error) {
401 case EHOSTUNREACH:
402 case ENETUNREACH:
403 case EMSGSIZE:
404 case ENOBUFS:
405 case ENOMEM:
406 break;
407 default:
408 printf("[%s:%d] (ipsec): error code %d\n",
409 __func__, __LINE__, error);
410 /* FALLTHROUGH */
411 case ENOENT:
412 /* don't show these error codes to the user */
413 error = 0;
414 break;
415 }
416 goto bad;
417 } else if (!needipsectun) {
418 /*
419 * In the FAST IPSec case we have already
420 * re-injected the packet and it has been freed
421 * by the ipsec_done() function. So, just clean
422 * up after ourselves.
423 */
424 m = NULL;
425 goto done;
426 }
427 if (exthdrs.ip6e_rthdr) {
428 /* ah6_output doesn't modify mbuf chain */
429 rh->ip6r_segleft = segleft_org;
430 }
431 skip_ipsec2:;
432 #endif /* IPSEC */
433
434 /*
435 * If there is a routing header, replace the destination address field
436 * with the first hop of the routing header.
437 */
438 if (exthdrs.ip6e_rthdr) {
439 struct ip6_rthdr *rh =
440 (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
441 struct ip6_rthdr *));
442 struct ip6_rthdr0 *rh0;
443 struct in6_addr *addr;
444 struct sockaddr_in6 sa;
445
446 switch (rh->ip6r_type) {
447 case IPV6_RTHDR_TYPE_0:
448 rh0 = (struct ip6_rthdr0 *)rh;
449 addr = (struct in6_addr *)(rh0 + 1);
450
451 /*
452 * construct a sockaddr_in6 form of
453 * the first hop.
454 *
455 * XXX: we may not have enough
456 * information about its scope zone;
457 * there is no standard API to pass
458 * the information from the
459 * application.
460 */
461 bzero(&sa, sizeof(sa));
462 sa.sin6_family = AF_INET6;
463 sa.sin6_len = sizeof(sa);
464 sa.sin6_addr = addr[0];
465 if ((error = sa6_embedscope(&sa,
466 V_ip6_use_defzone)) != 0) {
467 goto bad;
468 }
469 ip6->ip6_dst = sa.sin6_addr;
470 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
471 * (rh0->ip6r0_segleft - 1));
472 addr[rh0->ip6r0_segleft - 1] = finaldst;
473 /* XXX */
474 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
475 break;
476 default: /* is it possible? */
477 error = EINVAL;
478 goto bad;
479 }
480 }
481
482 /* Source address validation */
483 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
484 (flags & IPV6_UNSPECSRC) == 0) {
485 error = EOPNOTSUPP;
486 V_ip6stat.ip6s_badscope++;
487 goto bad;
488 }
489 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
490 error = EOPNOTSUPP;
491 V_ip6stat.ip6s_badscope++;
492 goto bad;
493 }
494
495 V_ip6stat.ip6s_localout++;
496
497 /*
498 * Route packet.
499 */
500 if (ro == 0) {
501 ro = &ip6route;
502 bzero((caddr_t)ro, sizeof(*ro));
503 }
504 ro_pmtu = ro;
505 if (opt && opt->ip6po_rthdr)
506 ro = &opt->ip6po_route;
507 dst = (struct sockaddr_in6 *)&ro->ro_dst;
508
509 again:
510 /*
511 * if specified, try to fill in the traffic class field.
512 * do not override if a non-zero value is already set.
513 * we check the diffserv field and the ecn field separately.
514 */
515 if (opt && opt->ip6po_tclass >= 0) {
516 int mask = 0;
517
518 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
519 mask |= 0xfc;
520 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
521 mask |= 0x03;
522 if (mask != 0)
523 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
524 }
525
526 /* fill in or override the hop limit field, if necessary. */
527 if (opt && opt->ip6po_hlim != -1)
528 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
529 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
530 if (im6o != NULL)
531 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
532 else
533 ip6->ip6_hlim = V_ip6_defmcasthlim;
534 }
535
536 #ifdef IPSEC
537 /*
538 * We may re-inject packets into the stack here.
539 */
540 if (needipsec && needipsectun) {
541 struct ipsec_output_state state;
542
543 /*
544 * All the extension headers will become inaccessible
545 * (since they can be encrypted).
546 * Don't panic, we need no more updates to extension headers
547 * on inner IPv6 packet (since they are now encapsulated).
548 *
549 * IPv6 [ESP|AH] IPv6 [extension headers] payload
550 */
551 bzero(&exthdrs, sizeof(exthdrs));
552 exthdrs.ip6e_ip6 = m;
553
554 bzero(&state, sizeof(state));
555 state.m = m;
556 state.ro = (struct route *)ro;
557 state.dst = (struct sockaddr *)dst;
558
559 error = ipsec6_output_tunnel(&state, sp, flags);
560
561 m = state.m;
562 ro = (struct route_in6 *)state.ro;
563 dst = (struct sockaddr_in6 *)state.dst;
564 if (error == EJUSTRETURN) {
565 /*
566 * We had a SP with a level of 'use' and no SA. We
567 * will just continue to process the packet without
568 * IPsec processing.
569 */
570 ;
571 } else if (error) {
572 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
573 m0 = m = NULL;
574 m = NULL;
575 switch (error) {
576 case EHOSTUNREACH:
577 case ENETUNREACH:
578 case EMSGSIZE:
579 case ENOBUFS:
580 case ENOMEM:
581 break;
582 default:
583 printf("[%s:%d] (ipsec): error code %d\n",
584 __func__, __LINE__, error);
585 /* FALLTHROUGH */
586 case ENOENT:
587 /* don't show these error codes to the user */
588 error = 0;
589 break;
590 }
591 goto bad;
592 } else {
593 /*
594 * In the FAST IPSec case we have already
595 * re-injected the packet and it has been freed
596 * by the ipsec_done() function. So, just clean
597 * up after ourselves.
598 */
599 m = NULL;
600 goto done;
601 }
602
603 exthdrs.ip6e_ip6 = m;
604 }
605 #endif /* IPSEC */
606
607 /* adjust pointer */
608 ip6 = mtod(m, struct ip6_hdr *);
609
610 bzero(&dst_sa, sizeof(dst_sa));
611 dst_sa.sin6_family = AF_INET6;
612 dst_sa.sin6_len = sizeof(dst_sa);
613 dst_sa.sin6_addr = ip6->ip6_dst;
614 if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
615 &ifp, &rt, 0)) != 0) {
616 switch (error) {
617 case EHOSTUNREACH:
618 V_ip6stat.ip6s_noroute++;
619 break;
620 case EADDRNOTAVAIL:
621 default:
622 break; /* XXX statistics? */
623 }
624 if (ifp != NULL)
625 in6_ifstat_inc(ifp, ifs6_out_discard);
626 goto bad;
627 }
628 if (rt == NULL) {
629 /*
630 * If in6_selectroute() does not return a route entry,
631 * dst may not have been updated.
632 */
633 *dst = dst_sa; /* XXX */
634 }
635
636 /*
637 * then rt (for unicast) and ifp must be non-NULL valid values.
638 */
639 if ((flags & IPV6_FORWARDING) == 0) {
640 /* XXX: the FORWARDING flag can be set for mrouting. */
641 in6_ifstat_inc(ifp, ifs6_out_request);
642 }
643 if (rt != NULL) {
644 ia = (struct in6_ifaddr *)(rt->rt_ifa);
645 rt->rt_use++;
646 }
647
648 /*
649 * The outgoing interface must be in the zone of source and
650 * destination addresses. We should use ia_ifp to support the
651 * case of sending packets to an address of our own.
652 */
653 if (ia != NULL && ia->ia_ifp)
654 origifp = ia->ia_ifp;
655 else
656 origifp = ifp;
657
658 src0 = ip6->ip6_src;
659 if (in6_setscope(&src0, origifp, &zone))
660 goto badscope;
661 bzero(&src_sa, sizeof(src_sa));
662 src_sa.sin6_family = AF_INET6;
663 src_sa.sin6_len = sizeof(src_sa);
664 src_sa.sin6_addr = ip6->ip6_src;
665 if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
666 goto badscope;
667
668 dst0 = ip6->ip6_dst;
669 if (in6_setscope(&dst0, origifp, &zone))
670 goto badscope;
671 /* re-initialize to be sure */
672 bzero(&dst_sa, sizeof(dst_sa));
673 dst_sa.sin6_family = AF_INET6;
674 dst_sa.sin6_len = sizeof(dst_sa);
675 dst_sa.sin6_addr = ip6->ip6_dst;
676 if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
677 goto badscope;
678 }
679
680 /* scope check is done. */
681 goto routefound;
682
683 badscope:
684 V_ip6stat.ip6s_badscope++;
685 in6_ifstat_inc(origifp, ifs6_out_discard);
686 if (error == 0)
687 error = EHOSTUNREACH; /* XXX */
688 goto bad;
689
690 routefound:
691 if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
692 if (opt && opt->ip6po_nextroute.ro_rt) {
693 /*
694 * The nexthop is explicitly specified by the
695 * application. We assume the next hop is an IPv6
696 * address.
697 */
698 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
699 }
700 else if ((rt->rt_flags & RTF_GATEWAY))
701 dst = (struct sockaddr_in6 *)rt->rt_gateway;
702 }
703
704 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
705 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
706 } else {
707 struct in6_multi *in6m;
708
709 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
710
711 in6_ifstat_inc(ifp, ifs6_out_mcast);
712
713 /*
714 * Confirm that the outgoing interface supports multicast.
715 */
716 if (!(ifp->if_flags & IFF_MULTICAST)) {
717 V_ip6stat.ip6s_noroute++;
718 in6_ifstat_inc(ifp, ifs6_out_discard);
719 error = ENETUNREACH;
720 goto bad;
721 }
722 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
723 if (in6m != NULL &&
724 (im6o == NULL || im6o->im6o_multicast_loop)) {
725 /*
726 * If we belong to the destination multicast group
727 * on the outgoing interface, and the caller did not
728 * forbid loopback, loop back a copy.
729 */
730 ip6_mloopback(ifp, m, dst);
731 } else {
732 /*
733 * If we are acting as a multicast router, perform
734 * multicast forwarding as if the packet had just
735 * arrived on the interface to which we are about
736 * to send. The multicast forwarding function
737 * recursively calls this function, using the
738 * IPV6_FORWARDING flag to prevent infinite recursion.
739 *
740 * Multicasts that are looped back by ip6_mloopback(),
741 * above, will be forwarded by the ip6_input() routine,
742 * if necessary.
743 */
744 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
745 /*
746 * XXX: ip6_mforward expects that rcvif is NULL
747 * when it is called from the originating path.
748 * However, it is not always the case, since
749 * some versions of MGETHDR() does not
750 * initialize the field.
751 */
752 m->m_pkthdr.rcvif = NULL;
753 if (ip6_mforward(ip6, ifp, m) != 0) {
754 m_freem(m);
755 goto done;
756 }
757 }
758 }
759 /*
760 * Multicasts with a hoplimit of zero may be looped back,
761 * above, but must not be transmitted on a network.
762 * Also, multicasts addressed to the loopback interface
763 * are not sent -- the above call to ip6_mloopback() will
764 * loop back a copy if this host actually belongs to the
765 * destination group on the loopback interface.
766 */
767 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
768 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
769 m_freem(m);
770 goto done;
771 }
772 }
773
774 /*
775 * Fill the outgoing inteface to tell the upper layer
776 * to increment per-interface statistics.
777 */
778 if (ifpp)
779 *ifpp = ifp;
780
781 /* Determine path MTU. */
782 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
783 &alwaysfrag)) != 0)
784 goto bad;
785
786 /*
787 * The caller of this function may specify to use the minimum MTU
788 * in some cases.
789 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
790 * setting. The logic is a bit complicated; by default, unicast
791 * packets will follow path MTU while multicast packets will be sent at
792 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
793 * including unicast ones will be sent at the minimum MTU. Multicast
794 * packets will always be sent at the minimum MTU unless
795 * IP6PO_MINMTU_DISABLE is explicitly specified.
796 * See RFC 3542 for more details.
797 */
798 if (mtu > IPV6_MMTU) {
799 if ((flags & IPV6_MINMTU))
800 mtu = IPV6_MMTU;
801 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
802 mtu = IPV6_MMTU;
803 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
804 (opt == NULL ||
805 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
806 mtu = IPV6_MMTU;
807 }
808 }
809
810 /*
811 * clear embedded scope identifiers if necessary.
812 * in6_clearscope will touch the addresses only when necessary.
813 */
814 in6_clearscope(&ip6->ip6_src);
815 in6_clearscope(&ip6->ip6_dst);
816
817 /*
818 * If the outgoing packet contains a hop-by-hop options header,
819 * it must be examined and processed even by the source node.
820 * (RFC 2460, section 4.)
821 */
822 if (exthdrs.ip6e_hbh) {
823 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
824 u_int32_t dummy; /* XXX unused */
825 u_int32_t plen = 0; /* XXX: ip6_process will check the value */
826
827 #ifdef DIAGNOSTIC
828 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
829 panic("ip6e_hbh is not continuous");
830 #endif
831 /*
832 * XXX: if we have to send an ICMPv6 error to the sender,
833 * we need the M_LOOP flag since icmp6_error() expects
834 * the IPv6 and the hop-by-hop options header are
835 * continuous unless the flag is set.
836 */
837 m->m_flags |= M_LOOP;
838 m->m_pkthdr.rcvif = ifp;
839 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
840 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
841 &dummy, &plen) < 0) {
842 /* m was already freed at this point */
843 error = EINVAL;/* better error? */
844 goto done;
845 }
846 m->m_flags &= ~M_LOOP; /* XXX */
847 m->m_pkthdr.rcvif = NULL;
848 }
849
850 /* Jump over all PFIL processing if hooks are not active. */
851 if (!PFIL_HOOKED(&inet6_pfil_hook))
852 goto passout;
853
854 odst = ip6->ip6_dst;
855 /* Run through list of hooks for output packets. */
856 error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
857 if (error != 0 || m == NULL)
858 goto done;
859 ip6 = mtod(m, struct ip6_hdr *);
860
861 /* See if destination IP address was changed by packet filter. */
862 if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
863 m->m_flags |= M_SKIP_FIREWALL;
864 /* If destination is now ourself drop to ip6_input(). */
865 if (in6_localaddr(&ip6->ip6_dst)) {
866 if (m->m_pkthdr.rcvif == NULL)
867 m->m_pkthdr.rcvif = V_loif;
868 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
869 m->m_pkthdr.csum_flags |=
870 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
871 m->m_pkthdr.csum_data = 0xffff;
872 }
873 m->m_pkthdr.csum_flags |=
874 CSUM_IP_CHECKED | CSUM_IP_VALID;
875 error = netisr_queue(NETISR_IPV6, m);
876 goto done;
877 } else
878 goto again; /* Redo the routing table lookup. */
879 }
880
881 /* XXX: IPFIREWALL_FORWARD */
882
883 passout:
884 /*
885 * Send the packet to the outgoing interface.
886 * If necessary, do IPv6 fragmentation before sending.
887 *
888 * the logic here is rather complex:
889 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
890 * 1-a: send as is if tlen <= path mtu
891 * 1-b: fragment if tlen > path mtu
892 *
893 * 2: if user asks us not to fragment (dontfrag == 1)
894 * 2-a: send as is if tlen <= interface mtu
895 * 2-b: error if tlen > interface mtu
896 *
897 * 3: if we always need to attach fragment header (alwaysfrag == 1)
898 * always fragment
899 *
900 * 4: if dontfrag == 1 && alwaysfrag == 1
901 * error, as we cannot handle this conflicting request
902 */
903 tlen = m->m_pkthdr.len;
904
905 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
906 dontfrag = 1;
907 else
908 dontfrag = 0;
909 if (dontfrag && alwaysfrag) { /* case 4 */
910 /* conflicting request - can't transmit */
911 error = EMSGSIZE;
912 goto bad;
913 }
914 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
915 /*
916 * Even if the DONTFRAG option is specified, we cannot send the
917 * packet when the data length is larger than the MTU of the
918 * outgoing interface.
919 * Notify the error by sending IPV6_PATHMTU ancillary data as
920 * well as returning an error code (the latter is not described
921 * in the API spec.)
922 */
923 u_int32_t mtu32;
924 struct ip6ctlparam ip6cp;
925
926 mtu32 = (u_int32_t)mtu;
927 bzero(&ip6cp, sizeof(ip6cp));
928 ip6cp.ip6c_cmdarg = (void *)&mtu32;
929 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
930 (void *)&ip6cp);
931
932 error = EMSGSIZE;
933 goto bad;
934 }
935
936 /*
937 * transmit packet without fragmentation
938 */
939 if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
940 struct in6_ifaddr *ia6;
941
942 ip6 = mtod(m, struct ip6_hdr *);
943 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
944 if (ia6) {
945 /* Record statistics for this interface address. */
946 ia6->ia_ifa.if_opackets++;
947 ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
948 }
949 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
950 goto done;
951 }
952
953 /*
954 * try to fragment the packet. case 1-b and 3
955 */
956 if (mtu < IPV6_MMTU) {
957 /* path MTU cannot be less than IPV6_MMTU */
958 error = EMSGSIZE;
959 in6_ifstat_inc(ifp, ifs6_out_fragfail);
960 goto bad;
961 } else if (ip6->ip6_plen == 0) {
962 /* jumbo payload cannot be fragmented */
963 error = EMSGSIZE;
964 in6_ifstat_inc(ifp, ifs6_out_fragfail);
965 goto bad;
966 } else {
967 struct mbuf **mnext, *m_frgpart;
968 struct ip6_frag *ip6f;
969 u_int32_t id = htonl(ip6_randomid());
970 u_char nextproto;
971
972 int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
973
974 /*
975 * Too large for the destination or interface;
976 * fragment if possible.
977 * Must be able to put at least 8 bytes per fragment.
978 */
|