[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet6/ip6_output.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*-
  2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  3  * All rights reserved.
  4  *
  5  * Redistribution and use in source and binary forms, with or without
  6  * modification, are permitted provided that the following conditions
  7  * are met:
  8  * 1. Redistributions of source code must retain the above copyright
  9  *    notice, this list of conditions and the following disclaimer.
 10  * 2. Redistributions in binary form must reproduce the above copyright
 11  *    notice, this list of conditions and the following disclaimer in the
 12  *    documentation and/or other materials provided with the distribution.
 13  * 3. Neither the name of the project nor the names of its contributors
 14  *    may be used to endorse or promote products derived from this software
 15  *    without specific prior written permission.
 16  *
 17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 27  * SUCH DAMAGE.
 28  *
 29  *      $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
 30  */
 31 
 32 /*-
 33  * Copyright (c) 1982, 1986, 1988, 1990, 1993
 34  *      The Regents of the University of California.  All rights reserved.
 35  *
 36  * Redistribution and use in source and binary forms, with or without
 37  * modification, are permitted provided that the following conditions
 38  * are met:
 39  * 1. Redistributions of source code must retain the above copyright
 40  *    notice, this list of conditions and the following disclaimer.
 41  * 2. Redistributions in binary form must reproduce the above copyright
 42  *    notice, this list of conditions and the following disclaimer in the
 43  *    documentation and/or other materials provided with the distribution.
 44  * 4. Neither the name of the University nor the names of its contributors
 45  *    may be used to endorse or promote products derived from this software
 46  *    without specific prior written permission.
 47  *
 48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 58  * SUCH DAMAGE.
 59  *
 60  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
 61  */
 62 
 63 #include <sys/cdefs.h>
 64 __FBSDID("$FreeBSD: src/sys/netinet6/ip6_output.c,v 1.122 2008/10/15 19:24:18 bz Exp $");
 65 
 66 #include "opt_inet.h"
 67 #include "opt_inet6.h"
 68 #include "opt_ipsec.h"
 69 
 70 #include <sys/param.h>
 71 #include <sys/kernel.h>
 72 #include <sys/malloc.h>
 73 #include <sys/mbuf.h>
 74 #include <sys/errno.h>
 75 #include <sys/priv.h>
 76 #include <sys/proc.h>
 77 #include <sys/protosw.h>
 78 #include <sys/socket.h>
 79 #include <sys/socketvar.h>
 80 #include <sys/ucred.h>
 81 #include <sys/vimage.h>
 82 
 83 #include <net/if.h>
 84 #include <net/netisr.h>
 85 #include <net/route.h>
 86 #include <net/pfil.h>
 87 
 88 #include <netinet/in.h>
 89 #include <netinet/in_var.h>
 90 #include <netinet6/in6_var.h>
 91 #include <netinet/ip6.h>
 92 #include <netinet/icmp6.h>
 93 #include <netinet6/ip6_var.h>
 94 #include <netinet/in_pcb.h>
 95 #include <netinet/tcp_var.h>
 96 #include <netinet6/nd6.h>
 97 
 98 #ifdef IPSEC
 99 #include <netipsec/ipsec.h>
100 #include <netipsec/ipsec6.h>
101 #include <netipsec/key.h>
102 #include <netinet6/ip6_ipsec.h>
103 #endif /* IPSEC */
104 
105 #include <netinet6/ip6protosw.h>
106 #include <netinet6/scope6_var.h>
107 
108 static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "internet multicast options");
109 
110 struct ip6_exthdrs {
111         struct mbuf *ip6e_ip6;
112         struct mbuf *ip6e_hbh;
113         struct mbuf *ip6e_dest1;
114         struct mbuf *ip6e_rthdr;
115         struct mbuf *ip6e_dest2;
116 };
117 
118 static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
119                            struct ucred *, int));
120 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
121         struct socket *, struct sockopt *));
122 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
123 static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *,
124         struct ucred *, int, int, int));
125 
126 static int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *);
127 static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **);
128 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
129 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
130         struct ip6_frag **));
131 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
132 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
133 static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
134         struct ifnet *, struct in6_addr *, u_long *, int *));
135 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
136 
137 
138 /*
139  * Make an extension header from option data.  hp is the source, and
140  * mp is the destination.
141  */
142 #define MAKE_EXTHDR(hp, mp)                                             \
143     do {                                                                \
144         if (hp) {                                                       \
145                 struct ip6_ext *eh = (struct ip6_ext *)(hp);            \
146                 error = ip6_copyexthdr((mp), (caddr_t)(hp),             \
147                     ((eh)->ip6e_len + 1) << 3);                         \
148                 if (error)                                              \
149                         goto freehdrs;                                  \
150         }                                                               \
151     } while (/*CONSTCOND*/ 0)
152 
153 /*
154  * Form a chain of extension headers.
155  * m is the extension header mbuf
156  * mp is the previous mbuf in the chain
157  * p is the next header
158  * i is the type of option.
159  */
160 #define MAKE_CHAIN(m, mp, p, i)\
161     do {\
162         if (m) {\
163                 if (!hdrsplit) \
164                         panic("assumption failed: hdr not split"); \
165                 *mtod((m), u_char *) = *(p);\
166                 *(p) = (i);\
167                 p = mtod((m), u_char *);\
168                 (m)->m_next = (mp)->m_next;\
169                 (mp)->m_next = (m);\
170                 (mp) = (m);\
171         }\
172     } while (/*CONSTCOND*/ 0)
173 
174 /*
175  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
176  * header (with pri, len, nxt, hlim, src, dst).
177  * This function may modify ver and hlim only.
178  * The mbuf chain containing the packet will be freed.
179  * The mbuf opt, if present, will not be freed.
180  *
181  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
182  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
183  * which is rt_rmx.rmx_mtu.
184  *
185  * ifpp - XXX: just for statistics
186  */
187 int
188 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
189     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
190     struct ifnet **ifpp, struct inpcb *inp)
191 {
192         INIT_VNET_NET(curvnet);
193         INIT_VNET_INET6(curvnet);
194         struct ip6_hdr *ip6, *mhip6;
195         struct ifnet *ifp, *origifp;
196         struct mbuf *m = m0;
197         struct mbuf *mprev = NULL;
198         int hlen, tlen, len, off;
199         struct route_in6 ip6route;
200         struct rtentry *rt = NULL;
201         struct sockaddr_in6 *dst, src_sa, dst_sa;
202         struct in6_addr odst;
203         int error = 0;
204         struct in6_ifaddr *ia = NULL;
205         u_long mtu;
206         int alwaysfrag, dontfrag;
207         u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
208         struct ip6_exthdrs exthdrs;
209         struct in6_addr finaldst, src0, dst0;
210         u_int32_t zone;
211         struct route_in6 *ro_pmtu = NULL;
212         int hdrsplit = 0;
213         int needipsec = 0;
214 #ifdef IPSEC
215         struct ipsec_output_state state;
216         struct ip6_rthdr *rh = NULL;
217         int needipsectun = 0;
218         int segleft_org = 0;
219         struct secpolicy *sp = NULL;
220 #endif /* IPSEC */
221 
222         ip6 = mtod(m, struct ip6_hdr *);
223         if (ip6 == NULL) {
224                 printf ("ip6 is NULL");
225                 goto bad;
226         }
227 
228         finaldst = ip6->ip6_dst;
229 
230         bzero(&exthdrs, sizeof(exthdrs));
231 
232         if (opt) {
233                 /* Hop-by-Hop options header */
234                 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
235                 /* Destination options header(1st part) */
236                 if (opt->ip6po_rthdr) {
237                         /*
238                          * Destination options header(1st part)
239                          * This only makes sense with a routing header.
240                          * See Section 9.2 of RFC 3542.
241                          * Disabling this part just for MIP6 convenience is
242                          * a bad idea.  We need to think carefully about a
243                          * way to make the advanced API coexist with MIP6
244                          * options, which might automatically be inserted in
245                          * the kernel.
246                          */
247                         MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
248                 }
249                 /* Routing header */
250                 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
251                 /* Destination options header(2nd part) */
252                 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
253         }
254 
255         /*
256          * IPSec checking which handles several cases.
257          * FAST IPSEC: We re-injected the packet.
258          */
259 #ifdef IPSEC
260         switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
261         {
262         case 1:                 /* Bad packet */
263                 goto freehdrs;
264         case -1:                /* Do IPSec */
265                 needipsec = 1;
266         case 0:                 /* No IPSec */
267         default:
268                 break;
269         }
270 #endif /* IPSEC */
271 
272         /*
273          * Calculate the total length of the extension header chain.
274          * Keep the length of the unfragmentable part for fragmentation.
275          */
276         optlen = 0;
277         if (exthdrs.ip6e_hbh)
278                 optlen += exthdrs.ip6e_hbh->m_len;
279         if (exthdrs.ip6e_dest1)
280                 optlen += exthdrs.ip6e_dest1->m_len;
281         if (exthdrs.ip6e_rthdr)
282                 optlen += exthdrs.ip6e_rthdr->m_len;
283         unfragpartlen = optlen + sizeof(struct ip6_hdr);
284 
285         /* NOTE: we don't add AH/ESP length here. do that later. */
286         if (exthdrs.ip6e_dest2)
287                 optlen += exthdrs.ip6e_dest2->m_len;
288 
289         /*
290          * If we need IPsec, or there is at least one extension header,
291          * separate IP6 header from the payload.
292          */
293         if ((needipsec || optlen) && !hdrsplit) {
294                 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
295                         m = NULL;
296                         goto freehdrs;
297                 }
298                 m = exthdrs.ip6e_ip6;
299                 hdrsplit++;
300         }
301 
302         /* adjust pointer */
303         ip6 = mtod(m, struct ip6_hdr *);
304 
305         /* adjust mbuf packet header length */
306         m->m_pkthdr.len += optlen;
307         plen = m->m_pkthdr.len - sizeof(*ip6);
308 
309         /* If this is a jumbo payload, insert a jumbo payload option. */
310         if (plen > IPV6_MAXPACKET) {
311                 if (!hdrsplit) {
312                         if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
313                                 m = NULL;
314                                 goto freehdrs;
315                         }
316                         m = exthdrs.ip6e_ip6;
317                         hdrsplit++;
318                 }
319                 /* adjust pointer */
320                 ip6 = mtod(m, struct ip6_hdr *);
321                 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
322                         goto freehdrs;
323                 ip6->ip6_plen = 0;
324         } else
325                 ip6->ip6_plen = htons(plen);
326 
327         /*
328          * Concatenate headers and fill in next header fields.
329          * Here we have, on "m"
330          *      IPv6 payload
331          * and we insert headers accordingly.  Finally, we should be getting:
332          *      IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
333          *
334          * during the header composing process, "m" points to IPv6 header.
335          * "mprev" points to an extension header prior to esp.
336          */
337         u_char *nexthdrp = &ip6->ip6_nxt;
338         mprev = m;
339 
340         /*
341          * we treat dest2 specially.  this makes IPsec processing
342          * much easier.  the goal here is to make mprev point the
343          * mbuf prior to dest2.
344          *
345          * result: IPv6 dest2 payload
346          * m and mprev will point to IPv6 header.
347          */
348         if (exthdrs.ip6e_dest2) {
349                 if (!hdrsplit)
350                         panic("assumption failed: hdr not split");
351                 exthdrs.ip6e_dest2->m_next = m->m_next;
352                 m->m_next = exthdrs.ip6e_dest2;
353                 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
354                 ip6->ip6_nxt = IPPROTO_DSTOPTS;
355         }
356 
357         /*
358          * result: IPv6 hbh dest1 rthdr dest2 payload
359          * m will point to IPv6 header.  mprev will point to the
360          * extension header prior to dest2 (rthdr in the above case).
361          */
362         MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
363         MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
364                    IPPROTO_DSTOPTS);
365         MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
366                    IPPROTO_ROUTING);
367 
368 #ifdef IPSEC
369         if (!needipsec)
370                 goto skip_ipsec2;
371 
372         /*
373          * pointers after IPsec headers are not valid any more.
374          * other pointers need a great care too.
375          * (IPsec routines should not mangle mbufs prior to AH/ESP)
376          */
377         exthdrs.ip6e_dest2 = NULL;
378 
379         if (exthdrs.ip6e_rthdr) {
380                 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
381                 segleft_org = rh->ip6r_segleft;
382                 rh->ip6r_segleft = 0;
383         }
384 
385         bzero(&state, sizeof(state));
386         state.m = m;
387         error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
388                                     &needipsectun);
389         m = state.m;
390         if (error == EJUSTRETURN) {
391                 /*
392                  * We had a SP with a level of 'use' and no SA. We
393                  * will just continue to process the packet without
394                  * IPsec processing.
395                  */
396                 ;
397         } else if (error) {
398                 /* mbuf is already reclaimed in ipsec6_output_trans. */
399                 m = NULL;
400                 switch (error) {
401                 case EHOSTUNREACH:
402                 case ENETUNREACH:
403                 case EMSGSIZE:
404                 case ENOBUFS:
405                 case ENOMEM:
406                         break;
407                 default:
408                         printf("[%s:%d] (ipsec): error code %d\n",
409                             __func__, __LINE__, error);
410                         /* FALLTHROUGH */
411                 case ENOENT:
412                         /* don't show these error codes to the user */
413                         error = 0;
414                         break;
415                 }
416                 goto bad;
417         } else if (!needipsectun) {
418                 /*
419                  * In the FAST IPSec case we have already
420                  * re-injected the packet and it has been freed
421                  * by the ipsec_done() function.  So, just clean
422                  * up after ourselves.
423                  */
424                 m = NULL;
425                 goto done;
426         }
427         if (exthdrs.ip6e_rthdr) {
428                 /* ah6_output doesn't modify mbuf chain */
429                 rh->ip6r_segleft = segleft_org;
430         }
431 skip_ipsec2:;
432 #endif /* IPSEC */
433 
434         /*
435          * If there is a routing header, replace the destination address field
436          * with the first hop of the routing header.
437          */
438         if (exthdrs.ip6e_rthdr) {
439                 struct ip6_rthdr *rh =
440                         (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
441                                                   struct ip6_rthdr *));
442                 struct ip6_rthdr0 *rh0;
443                 struct in6_addr *addr;
444                 struct sockaddr_in6 sa;
445 
446                 switch (rh->ip6r_type) {
447                 case IPV6_RTHDR_TYPE_0:
448                          rh0 = (struct ip6_rthdr0 *)rh;
449                          addr = (struct in6_addr *)(rh0 + 1);
450 
451                          /*
452                           * construct a sockaddr_in6 form of
453                           * the first hop.
454                           *
455                           * XXX: we may not have enough
456                           * information about its scope zone;
457                           * there is no standard API to pass
458                           * the information from the
459                           * application.
460                           */
461                          bzero(&sa, sizeof(sa));
462                          sa.sin6_family = AF_INET6;
463                          sa.sin6_len = sizeof(sa);
464                          sa.sin6_addr = addr[0];
465                          if ((error = sa6_embedscope(&sa,
466                              V_ip6_use_defzone)) != 0) {
467                                  goto bad;
468                          }
469                          ip6->ip6_dst = sa.sin6_addr;
470                          bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
471                              * (rh0->ip6r0_segleft - 1));
472                          addr[rh0->ip6r0_segleft - 1] = finaldst;
473                          /* XXX */
474                          in6_clearscope(addr + rh0->ip6r0_segleft - 1);
475                          break;
476                 default:        /* is it possible? */
477                          error = EINVAL;
478                          goto bad;
479                 }
480         }
481 
482         /* Source address validation */
483         if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
484             (flags & IPV6_UNSPECSRC) == 0) {
485                 error = EOPNOTSUPP;
486                 V_ip6stat.ip6s_badscope++;
487                 goto bad;
488         }
489         if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
490                 error = EOPNOTSUPP;
491                 V_ip6stat.ip6s_badscope++;
492                 goto bad;
493         }
494 
495         V_ip6stat.ip6s_localout++;
496 
497         /*
498          * Route packet.
499          */
500         if (ro == 0) {
501                 ro = &ip6route;
502                 bzero((caddr_t)ro, sizeof(*ro));
503         }
504         ro_pmtu = ro;
505         if (opt && opt->ip6po_rthdr)
506                 ro = &opt->ip6po_route;
507         dst = (struct sockaddr_in6 *)&ro->ro_dst;
508 
509 again:
510         /*
511          * if specified, try to fill in the traffic class field.
512          * do not override if a non-zero value is already set.
513          * we check the diffserv field and the ecn field separately.
514          */
515         if (opt && opt->ip6po_tclass >= 0) {
516                 int mask = 0;
517 
518                 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
519                         mask |= 0xfc;
520                 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
521                         mask |= 0x03;
522                 if (mask != 0)
523                         ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
524         }
525 
526         /* fill in or override the hop limit field, if necessary. */
527         if (opt && opt->ip6po_hlim != -1)
528                 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
529         else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
530                 if (im6o != NULL)
531                         ip6->ip6_hlim = im6o->im6o_multicast_hlim;
532                 else
533                         ip6->ip6_hlim = V_ip6_defmcasthlim;
534         }
535 
536 #ifdef IPSEC
537         /*
538          * We may re-inject packets into the stack here.
539          */
540         if (needipsec && needipsectun) {
541                 struct ipsec_output_state state;
542 
543                 /*
544                  * All the extension headers will become inaccessible
545                  * (since they can be encrypted).
546                  * Don't panic, we need no more updates to extension headers
547                  * on inner IPv6 packet (since they are now encapsulated).
548                  *
549                  * IPv6 [ESP|AH] IPv6 [extension headers] payload
550                  */
551                 bzero(&exthdrs, sizeof(exthdrs));
552                 exthdrs.ip6e_ip6 = m;
553 
554                 bzero(&state, sizeof(state));
555                 state.m = m;
556                 state.ro = (struct route *)ro;
557                 state.dst = (struct sockaddr *)dst;
558 
559                 error = ipsec6_output_tunnel(&state, sp, flags);
560 
561                 m = state.m;
562                 ro = (struct route_in6 *)state.ro;
563                 dst = (struct sockaddr_in6 *)state.dst;
564                 if (error == EJUSTRETURN) {
565                         /*
566                          * We had a SP with a level of 'use' and no SA. We
567                          * will just continue to process the packet without
568                          * IPsec processing.
569                          */
570                         ;
571                 } else if (error) {
572                         /* mbuf is already reclaimed in ipsec6_output_tunnel. */
573                         m0 = m = NULL;
574                         m = NULL;
575                         switch (error) {
576                         case EHOSTUNREACH:
577                         case ENETUNREACH:
578                         case EMSGSIZE:
579                         case ENOBUFS:
580                         case ENOMEM:
581                                 break;
582                         default:
583                                 printf("[%s:%d] (ipsec): error code %d\n",
584                                     __func__, __LINE__, error);
585                                 /* FALLTHROUGH */
586                         case ENOENT:
587                                 /* don't show these error codes to the user */
588                                 error = 0;
589                                 break;
590                         }
591                         goto bad;
592                 } else {
593                         /*
594                          * In the FAST IPSec case we have already
595                          * re-injected the packet and it has been freed
596                          * by the ipsec_done() function.  So, just clean
597                          * up after ourselves.
598                          */
599                         m = NULL;
600                         goto done;
601                 }
602 
603                 exthdrs.ip6e_ip6 = m;
604         }
605 #endif /* IPSEC */
606 
607         /* adjust pointer */
608         ip6 = mtod(m, struct ip6_hdr *);
609 
610         bzero(&dst_sa, sizeof(dst_sa));
611         dst_sa.sin6_family = AF_INET6;
612         dst_sa.sin6_len = sizeof(dst_sa);
613         dst_sa.sin6_addr = ip6->ip6_dst;
614         if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
615             &ifp, &rt, 0)) != 0) {
616                 switch (error) {
617                 case EHOSTUNREACH:
618                         V_ip6stat.ip6s_noroute++;
619                         break;
620                 case EADDRNOTAVAIL:
621                 default:
622                         break; /* XXX statistics? */
623                 }
624                 if (ifp != NULL)
625                         in6_ifstat_inc(ifp, ifs6_out_discard);
626                 goto bad;
627         }
628         if (rt == NULL) {
629                 /*
630                  * If in6_selectroute() does not return a route entry,
631                  * dst may not have been updated.
632                  */
633                 *dst = dst_sa;  /* XXX */
634         }
635 
636         /*
637          * then rt (for unicast) and ifp must be non-NULL valid values.
638          */
639         if ((flags & IPV6_FORWARDING) == 0) {
640                 /* XXX: the FORWARDING flag can be set for mrouting. */
641                 in6_ifstat_inc(ifp, ifs6_out_request);
642         }
643         if (rt != NULL) {
644                 ia = (struct in6_ifaddr *)(rt->rt_ifa);
645                 rt->rt_use++;
646         }
647 
648         /*
649          * The outgoing interface must be in the zone of source and
650          * destination addresses.  We should use ia_ifp to support the
651          * case of sending packets to an address of our own.
652          */
653         if (ia != NULL && ia->ia_ifp)
654                 origifp = ia->ia_ifp;
655         else
656                 origifp = ifp;
657 
658         src0 = ip6->ip6_src;
659         if (in6_setscope(&src0, origifp, &zone))
660                 goto badscope;
661         bzero(&src_sa, sizeof(src_sa));
662         src_sa.sin6_family = AF_INET6;
663         src_sa.sin6_len = sizeof(src_sa);
664         src_sa.sin6_addr = ip6->ip6_src;
665         if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
666                 goto badscope;
667 
668         dst0 = ip6->ip6_dst;
669         if (in6_setscope(&dst0, origifp, &zone))
670                 goto badscope;
671         /* re-initialize to be sure */
672         bzero(&dst_sa, sizeof(dst_sa));
673         dst_sa.sin6_family = AF_INET6;
674         dst_sa.sin6_len = sizeof(dst_sa);
675         dst_sa.sin6_addr = ip6->ip6_dst;
676         if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
677                 goto badscope;
678         }
679 
680         /* scope check is done. */
681         goto routefound;
682 
683   badscope:
684         V_ip6stat.ip6s_badscope++;
685         in6_ifstat_inc(origifp, ifs6_out_discard);
686         if (error == 0)
687                 error = EHOSTUNREACH; /* XXX */
688         goto bad;
689 
690   routefound:
691         if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
692                 if (opt && opt->ip6po_nextroute.ro_rt) {
693                         /*
694                          * The nexthop is explicitly specified by the
695                          * application.  We assume the next hop is an IPv6
696                          * address.
697                          */
698                         dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
699                 }
700                 else if ((rt->rt_flags & RTF_GATEWAY))
701                         dst = (struct sockaddr_in6 *)rt->rt_gateway;
702         }
703 
704         if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
705                 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
706         } else {
707                 struct  in6_multi *in6m;
708 
709                 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
710 
711                 in6_ifstat_inc(ifp, ifs6_out_mcast);
712 
713                 /*
714                  * Confirm that the outgoing interface supports multicast.
715                  */
716                 if (!(ifp->if_flags & IFF_MULTICAST)) {
717                         V_ip6stat.ip6s_noroute++;
718                         in6_ifstat_inc(ifp, ifs6_out_discard);
719                         error = ENETUNREACH;
720                         goto bad;
721                 }
722                 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
723                 if (in6m != NULL &&
724                    (im6o == NULL || im6o->im6o_multicast_loop)) {
725                         /*
726                          * If we belong to the destination multicast group
727                          * on the outgoing interface, and the caller did not
728                          * forbid loopback, loop back a copy.
729                          */
730                         ip6_mloopback(ifp, m, dst);
731                 } else {
732                         /*
733                          * If we are acting as a multicast router, perform
734                          * multicast forwarding as if the packet had just
735                          * arrived on the interface to which we are about
736                          * to send.  The multicast forwarding function
737                          * recursively calls this function, using the
738                          * IPV6_FORWARDING flag to prevent infinite recursion.
739                          *
740                          * Multicasts that are looped back by ip6_mloopback(),
741                          * above, will be forwarded by the ip6_input() routine,
742                          * if necessary.
743                          */
744                         if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
745                                 /*
746                                  * XXX: ip6_mforward expects that rcvif is NULL
747                                  * when it is called from the originating path.
748                                  * However, it is not always the case, since
749                                  * some versions of MGETHDR() does not
750                                  * initialize the field.
751                                  */
752                                 m->m_pkthdr.rcvif = NULL;
753                                 if (ip6_mforward(ip6, ifp, m) != 0) {
754                                         m_freem(m);
755                                         goto done;
756                                 }
757                         }
758                 }
759                 /*
760                  * Multicasts with a hoplimit of zero may be looped back,
761                  * above, but must not be transmitted on a network.
762                  * Also, multicasts addressed to the loopback interface
763                  * are not sent -- the above call to ip6_mloopback() will
764                  * loop back a copy if this host actually belongs to the
765                  * destination group on the loopback interface.
766                  */
767                 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
768                     IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
769                         m_freem(m);
770                         goto done;
771                 }
772         }
773 
774         /*
775          * Fill the outgoing inteface to tell the upper layer
776          * to increment per-interface statistics.
777          */
778         if (ifpp)
779                 *ifpp = ifp;
780 
781         /* Determine path MTU. */
782         if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
783             &alwaysfrag)) != 0)
784                 goto bad;
785 
786         /*
787          * The caller of this function may specify to use the minimum MTU
788          * in some cases.
789          * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
790          * setting.  The logic is a bit complicated; by default, unicast
791          * packets will follow path MTU while multicast packets will be sent at
792          * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
793          * including unicast ones will be sent at the minimum MTU.  Multicast
794          * packets will always be sent at the minimum MTU unless
795          * IP6PO_MINMTU_DISABLE is explicitly specified.
796          * See RFC 3542 for more details.
797          */
798         if (mtu > IPV6_MMTU) {
799                 if ((flags & IPV6_MINMTU))
800                         mtu = IPV6_MMTU;
801                 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
802                         mtu = IPV6_MMTU;
803                 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
804                          (opt == NULL ||
805                           opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
806                         mtu = IPV6_MMTU;
807                 }
808         }
809 
810         /*
811          * clear embedded scope identifiers if necessary.
812          * in6_clearscope will touch the addresses only when necessary.
813          */
814         in6_clearscope(&ip6->ip6_src);
815         in6_clearscope(&ip6->ip6_dst);
816 
817         /*
818          * If the outgoing packet contains a hop-by-hop options header,
819          * it must be examined and processed even by the source node.
820          * (RFC 2460, section 4.)
821          */
822         if (exthdrs.ip6e_hbh) {
823                 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
824                 u_int32_t dummy; /* XXX unused */
825                 u_int32_t plen = 0; /* XXX: ip6_process will check the value */
826 
827 #ifdef DIAGNOSTIC
828                 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
829                         panic("ip6e_hbh is not continuous");
830 #endif
831                 /*
832                  *  XXX: if we have to send an ICMPv6 error to the sender,
833                  *       we need the M_LOOP flag since icmp6_error() expects
834                  *       the IPv6 and the hop-by-hop options header are
835                  *       continuous unless the flag is set.
836                  */
837                 m->m_flags |= M_LOOP;
838                 m->m_pkthdr.rcvif = ifp;
839                 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
840                     ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
841                     &dummy, &plen) < 0) {
842                         /* m was already freed at this point */
843                         error = EINVAL;/* better error? */
844                         goto done;
845                 }
846                 m->m_flags &= ~M_LOOP; /* XXX */
847                 m->m_pkthdr.rcvif = NULL;
848         }
849 
850         /* Jump over all PFIL processing if hooks are not active. */
851         if (!PFIL_HOOKED(&inet6_pfil_hook))
852                 goto passout;
853 
854         odst = ip6->ip6_dst;
855         /* Run through list of hooks for output packets. */
856         error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
857         if (error != 0 || m == NULL)
858                 goto done;
859         ip6 = mtod(m, struct ip6_hdr *);
860 
861         /* See if destination IP address was changed by packet filter. */
862         if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
863                 m->m_flags |= M_SKIP_FIREWALL;
864                 /* If destination is now ourself drop to ip6_input(). */
865                 if (in6_localaddr(&ip6->ip6_dst)) {
866                         if (m->m_pkthdr.rcvif == NULL)
867                                 m->m_pkthdr.rcvif = V_loif;
868                         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
869                                 m->m_pkthdr.csum_flags |=
870                                     CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
871                                 m->m_pkthdr.csum_data = 0xffff;
872                         }
873                         m->m_pkthdr.csum_flags |=
874                             CSUM_IP_CHECKED | CSUM_IP_VALID;
875                         error = netisr_queue(NETISR_IPV6, m);
876                         goto done;
877                 } else
878                         goto again;     /* Redo the routing table lookup. */
879         }
880 
881         /* XXX: IPFIREWALL_FORWARD */
882 
883 passout:
884         /*
885          * Send the packet to the outgoing interface.
886          * If necessary, do IPv6 fragmentation before sending.
887          *
888          * the logic here is rather complex:
889          * 1: normal case (dontfrag == 0, alwaysfrag == 0)
890          * 1-a: send as is if tlen <= path mtu
891          * 1-b: fragment if tlen > path mtu
892          *
893          * 2: if user asks us not to fragment (dontfrag == 1)
894          * 2-a: send as is if tlen <= interface mtu
895          * 2-b: error if tlen > interface mtu
896          *
897          * 3: if we always need to attach fragment header (alwaysfrag == 1)
898          *      always fragment
899          *
900          * 4: if dontfrag == 1 && alwaysfrag == 1
901          *      error, as we cannot handle this conflicting request
902          */
903         tlen = m->m_pkthdr.len;
904 
905         if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
906                 dontfrag = 1;
907         else
908                 dontfrag = 0;
909         if (dontfrag && alwaysfrag) {   /* case 4 */
910                 /* conflicting request - can't transmit */
911                 error = EMSGSIZE;
912                 goto bad;
913         }
914         if (dontfrag && tlen > IN6_LINKMTU(ifp)) {      /* case 2-b */
915                 /*
916                  * Even if the DONTFRAG option is specified, we cannot send the
917                  * packet when the data length is larger than the MTU of the
918                  * outgoing interface.
919                  * Notify the error by sending IPV6_PATHMTU ancillary data as
920                  * well as returning an error code (the latter is not described
921                  * in the API spec.)
922                  */
923                 u_int32_t mtu32;
924                 struct ip6ctlparam ip6cp;
925 
926                 mtu32 = (u_int32_t)mtu;
927                 bzero(&ip6cp, sizeof(ip6cp));
928                 ip6cp.ip6c_cmdarg = (void *)&mtu32;
929                 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
930                     (void *)&ip6cp);
931 
932                 error = EMSGSIZE;
933                 goto bad;
934         }
935 
936         /*
937          * transmit packet without fragmentation
938          */
939         if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
940                 struct in6_ifaddr *ia6;
941 
942                 ip6 = mtod(m, struct ip6_hdr *);
943                 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
944                 if (ia6) {
945                         /* Record statistics for this interface address. */
946                         ia6->ia_ifa.if_opackets++;
947                         ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
948                 }
949                 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
950                 goto done;
951         }
952 
953         /*
954          * try to fragment the packet.  case 1-b and 3
955          */
956         if (mtu < IPV6_MMTU) {
957                 /* path MTU cannot be less than IPV6_MMTU */
958                 error = EMSGSIZE;
959                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
960                 goto bad;
961         } else if (ip6->ip6_plen == 0) {
962                 /* jumbo payload cannot be fragmented */
963                 error = EMSGSIZE;
964                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
965                 goto bad;
966         } else {
967                 struct mbuf **mnext, *m_frgpart;
968                 struct ip6_frag *ip6f;
969                 u_int32_t id = htonl(ip6_randomid());
970                 u_char nextproto;
971 
972                 int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
973 
974                 /*
975                  * Too large for the destination or interface;
976                  * fragment if possible.
977                  * Must be able to put at least 8 bytes per fragment.
978                  */